]> rtime.felk.cvut.cz Git - linux-imx.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Jul 2012 00:57:05 +0000 (17:57 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Jul 2012 00:57:05 +0000 (17:57 -0700)
Pull GFS2 updates from Steven Whitehouse.

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw:
  GFS2: Eliminate 64-bit divides
  GFS2: Reduce file fragmentation
  GFS2: kernel panic with small gfs2 filesystems - 1 RG
  GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when EIO occurs
  GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve
  GFS2: Add kobject release method
  GFS2: Size seq_file buffer more carefully
  GFS2: Use seq_vprintf for glocks debugfs file
  seq_file: Add seq_vprintf function and export it
  GFS2: Use lvbs for storing rgrp information with mount option
  GFS2: Cache last hash bucket for glock seq_files
  GFS2: Increase buffer size for glocks and glstats debugfs files
  GFS2: Fix error handling when reading an invalid block from the journal
  GFS2: Add "top dir" flag support
  GFS2: Fold quota data into the reservations struct
  GFS2: Extend the life of the reservations

23 files changed:
fs/gfs2/aops.c
fs/gfs2/bmap.c
fs/gfs2/dir.c
fs/gfs2/file.c
fs/gfs2/glock.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/lops.c
fs/gfs2/main.c
fs/gfs2/meta_io.c
fs/gfs2/ops_fstype.c
fs/gfs2/quota.c
fs/gfs2/rgrp.c
fs/gfs2/rgrp.h
fs/gfs2/super.c
fs/gfs2/sys.c
fs/gfs2/trace_gfs2.h
fs/gfs2/trans.h
fs/gfs2/util.h
fs/gfs2/xattr.c
fs/seq_file.c
include/linux/gfs2_ondisk.h
include/linux/seq_file.h

index e80a464850c835c976853161c5ad020ba23fa7c8..d6526347d3860eb8269f5ac5fb2f9817220af6b5 100644 (file)
@@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
        int alloc_required;
        int error = 0;
-       struct gfs2_qadata *qa = NULL;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
        struct page *page;
@@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
                gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 
        if (alloc_required) {
-               qa = gfs2_qadata_get(ip);
-               if (!qa) {
-                       error = -ENOMEM;
-                       goto out_unlock;
-               }
-
                error = gfs2_quota_lock_check(ip);
                if (error)
-                       goto out_alloc_put;
+                       goto out_unlock;
 
                error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
                if (error)
@@ -708,8 +701,6 @@ out_trans_fail:
                gfs2_inplace_release(ip);
 out_qunlock:
                gfs2_quota_unlock(ip);
-out_alloc_put:
-               gfs2_qadata_put(ip);
        }
 out_unlock:
        if (&ip->i_inode == sdp->sd_rindex) {
@@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
        struct buffer_head *dibh;
-       struct gfs2_qadata *qa = ip->i_qadata;
        unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
        unsigned int to = from + len;
        int ret;
@@ -878,12 +868,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        brelse(dibh);
 failed:
        gfs2_trans_end(sdp);
-       if (ip->i_res)
+       if (gfs2_mb_reserved(ip))
                gfs2_inplace_release(ip);
-       if (qa) {
+       if (ip->i_res->rs_qa_qd_num)
                gfs2_quota_unlock(ip);
-               gfs2_qadata_put(ip);
-       }
        if (inode == sdp->sd_rindex) {
                gfs2_glock_dq(&m_ip->i_gh);
                gfs2_holder_uninit(&m_ip->i_gh);
index dab54099dd98ecab64a820f168dcf2ff8466ad60..49cd7dd4a9fa882781fa895aff8c19401c170aad 100644 (file)
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (error)
                goto out_rlist;
 
+       if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
+               gfs2_rs_deltree(ip->i_res);
+
        error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
                                 RES_INDIRECT + RES_STATFS + RES_QUOTA,
                                 revokes);
@@ -1045,12 +1048,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
                lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
 
        find_metapath(sdp, lblock, &mp, ip->i_height);
-       if (!gfs2_qadata_get(ip))
-               return -ENOMEM;
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
 
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-               goto out;
+               return error;
 
        while (height--) {
                struct strip_mine sm;
@@ -1064,8 +1068,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
 
        gfs2_quota_unhold(ip);
 
-out:
-       gfs2_qadata_put(ip);
        return error;
 }
 
@@ -1167,19 +1169,14 @@ static int do_grow(struct inode *inode, u64 size)
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct buffer_head *dibh;
-       struct gfs2_qadata *qa = NULL;
        int error;
        int unstuff = 0;
 
        if (gfs2_is_stuffed(ip) &&
            (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-               qa = gfs2_qadata_get(ip);
-               if (qa == NULL)
-                       return -ENOMEM;
-
                error = gfs2_quota_lock_check(ip);
                if (error)
-                       goto do_grow_alloc_put;
+                       return error;
 
                error = gfs2_inplace_reserve(ip, 1);
                if (error)
@@ -1214,8 +1211,6 @@ do_grow_release:
                gfs2_inplace_release(ip);
 do_grow_qunlock:
                gfs2_quota_unlock(ip);
-do_grow_alloc_put:
-               gfs2_qadata_put(ip);
        }
        return error;
 }
index 8aaeb07a07b52bdcbfad125fd5ecbb2fcbb21213..259b088cfc4c64d5ce434dc5e3a65dc0eb250fd0 100644 (file)
@@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (!ht)
                return -ENOMEM;
 
-       if (!gfs2_qadata_get(dip)) {
-               error = -ENOMEM;
-               goto out;
-       }
-
        error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-               goto out_put;
+               goto out;
 
        /*  Count the number of leaves  */
        bh = leaf_bh;
@@ -1942,8 +1937,6 @@ out_rg_gunlock:
 out_rlist:
        gfs2_rlist_free(&rlist);
        gfs2_quota_unhold(dip);
-out_put:
-       gfs2_qadata_put(dip);
 out:
        kfree(ht);
        return error;
index 31b199f6efc1d538657b744e1dda7334e54029cc..9aa6af13823c5b75d3e8d2723d281dc79cee7a47 100644 (file)
@@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = {
        [7] = GFS2_DIF_NOATIME,
        [12] = GFS2_DIF_EXHASH,
        [14] = GFS2_DIF_INHERIT_JDATA,
+       [17] = GFS2_DIF_TOPDIR,
 };
 
 static const u32 gfs2_to_fsflags[32] = {
@@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = {
        [gfs2fl_AppendOnly] = FS_APPEND_FL,
        [gfs2fl_NoAtime] = FS_NOATIME_FL,
        [gfs2fl_ExHash] = FS_INDEX_FL,
+       [gfs2fl_TopLevel] = FS_TOPDIR_FL,
        [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
 
@@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode)
                             GFS2_DIF_NOATIME|                  \
                             GFS2_DIF_SYNC|                     \
                             GFS2_DIF_SYSTEM|                   \
+                            GFS2_DIF_TOPDIR|                   \
                             GFS2_DIF_INHERIT_JDATA)
 
 /**
@@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 
        gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
        if (!S_ISDIR(inode->i_mode)) {
+               gfsflags &= ~GFS2_DIF_TOPDIR;
                if (gfsflags & GFS2_DIF_INHERIT_JDATA)
                        gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
                return do_gfs2_set_flags(filp, gfsflags, ~0);
@@ -366,7 +370,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 pos = page->index << PAGE_CACHE_SHIFT;
        unsigned int data_blocks, ind_blocks, rblocks;
        struct gfs2_holder gh;
-       struct gfs2_qadata *qa;
        loff_t size;
        int ret;
 
@@ -376,6 +379,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
         */
        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
+       ret = gfs2_rs_alloc(ip);
+       if (ret)
+               return ret;
+
+       atomic_set(&ip->i_res->rs_sizehint,
+                  PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
+
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
        if (ret)
@@ -393,14 +403,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                goto out_unlock;
        }
 
-       ret = -ENOMEM;
-       qa = gfs2_qadata_get(ip);
-       if (qa == NULL)
+       ret = gfs2_rindex_update(sdp);
+       if (ret)
                goto out_unlock;
 
        ret = gfs2_quota_lock_check(ip);
        if (ret)
-               goto out_alloc_put;
+               goto out_unlock;
        gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
        ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
        if (ret)
@@ -447,8 +456,6 @@ out_trans_fail:
        gfs2_inplace_release(ip);
 out_quota_unlock:
        gfs2_quota_unlock(ip);
-out_alloc_put:
-       gfs2_qadata_put(ip);
 out_unlock:
        gfs2_glock_dq(&gh);
 out:
@@ -567,16 +574,14 @@ fail:
 
 static int gfs2_release(struct inode *inode, struct file *file)
 {
-       struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-       struct gfs2_file *fp;
+       struct gfs2_inode *ip = GFS2_I(inode);
 
-       fp = file->private_data;
+       kfree(file->private_data);
        file->private_data = NULL;
 
-       if (gfs2_assert_warn(sdp, fp))
-               return -EIO;
-
-       kfree(fp);
+       if ((file->f_mode & FMODE_WRITE) &&
+           (atomic_read(&inode->i_writecount) == 1))
+               gfs2_rs_delete(ip);
 
        return 0;
 }
@@ -653,12 +658,20 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                                   unsigned long nr_segs, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
+       size_t writesize = iov_length(iov, nr_segs);
+       struct dentry *dentry = file->f_dentry;
+       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_sbd *sdp;
+       int ret;
 
+       sdp = GFS2_SB(file->f_mapping->host);
+       ret = gfs2_rs_alloc(ip);
+       if (ret)
+               return ret;
+
+       atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
        if (file->f_flags & O_APPEND) {
-               struct dentry *dentry = file->f_dentry;
-               struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
                struct gfs2_holder gh;
-               int ret;
 
                ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
                if (ret)
@@ -751,7 +764,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
        loff_t bytes, max_bytes;
-       struct gfs2_qadata *qa;
        int error;
        const loff_t pos = offset;
        const loff_t count = len;
@@ -774,11 +786,17 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        if (bytes == 0)
                bytes = sdp->sd_sb.sb_bsize;
 
+       error = gfs2_rs_alloc(ip);
+       if (error)
+               return error;
+
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
        error = gfs2_glock_nq(&ip->i_gh);
        if (unlikely(error))
                goto out_uninit;
 
+       atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
+
        while (len > 0) {
                if (len < bytes)
                        bytes = len;
@@ -787,15 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
                        offset += bytes;
                        continue;
                }
-               qa = gfs2_qadata_get(ip);
-               if (!qa) {
-                       error = -ENOMEM;
-                       goto out_unlock;
-               }
-
                error = gfs2_quota_lock_check(ip);
                if (error)
-                       goto out_alloc_put;
+                       goto out_unlock;
 
 retry:
                gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
@@ -835,7 +847,6 @@ retry:
                offset += max_bytes;
                gfs2_inplace_release(ip);
                gfs2_quota_unlock(ip);
-               gfs2_qadata_put(ip);
        }
 
        if (error == 0)
@@ -846,8 +857,6 @@ out_trans_fail:
        gfs2_inplace_release(ip);
 out_qunlock:
        gfs2_quota_unlock(ip);
-out_alloc_put:
-       gfs2_qadata_put(ip);
 out_unlock:
        gfs2_glock_dq(&ip->i_gh);
 out_uninit:
index dab2526071cc7b53bc18a9203deea61ab78aaae3..1ed81f40da0dc7c0cf1e71b6da268d5edce07ca7 100644 (file)
 #include "trace_gfs2.h"
 
 struct gfs2_glock_iter {
-       int hash;                       /* hash bucket index         */
-       struct gfs2_sbd *sdp;           /* incore superblock         */
-       struct gfs2_glock *gl;          /* current glock struct      */
-       char string[512];               /* scratch space             */
+       int hash;                       /* hash bucket index           */
+       unsigned nhash;                 /* Index within current bucket */
+       struct gfs2_sbd *sdp;           /* incore superblock           */
+       struct gfs2_glock *gl;          /* current glock struct        */
+       loff_t last_pos;                /* last position               */
 };
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -767,6 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
        gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
        memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+       memset(gl->gl_lvb, 0, 32 * sizeof(char));
        gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
        gl->gl_tchange = jiffies;
        gl->gl_object = NULL;
@@ -948,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
        va_start(args, fmt);
 
        if (seq) {
-               struct gfs2_glock_iter *gi = seq->private;
-               vsprintf(gi->string, fmt, args);
-               seq_printf(seq, gi->string);
+               seq_vprintf(seq, fmt, args);
        } else {
                vaf.fmt = fmt;
                vaf.va = &args;
@@ -1854,8 +1854,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                gl = gi->gl;
                if (gl) {
                        gi->gl = glock_hash_next(gl);
+                       gi->nhash++;
                } else {
+                       if (gi->hash >= GFS2_GL_HASH_SIZE) {
+                               rcu_read_unlock();
+                               return 1;
+                       }
                        gi->gl = glock_hash_chain(gi->hash);
+                       gi->nhash = 0;
                }
                while (gi->gl == NULL) {
                        gi->hash++;
@@ -1864,6 +1870,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                                return 1;
                        }
                        gi->gl = glock_hash_chain(gi->hash);
+                       gi->nhash = 0;
                }
        /* Skip entries for other sb and dead entries */
        } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
@@ -1876,7 +1883,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
        struct gfs2_glock_iter *gi = seq->private;
        loff_t n = *pos;
 
-       gi->hash = 0;
+       if (gi->last_pos <= *pos)
+               n = gi->nhash + (*pos - gi->last_pos);
+       else
+               gi->hash = 0;
+
+       gi->nhash = 0;
        rcu_read_lock();
 
        do {
@@ -1884,6 +1896,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
                        return NULL;
        } while (n--);
 
+       gi->last_pos = *pos;
        return gi->gl;
 }
 
@@ -1893,7 +1906,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
        struct gfs2_glock_iter *gi = seq->private;
 
        (*pos)++;
-
+       gi->last_pos = *pos;
        if (gfs2_glock_iter_next(gi))
                return NULL;
 
@@ -1964,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
        .show  = gfs2_sbstats_seq_show,
 };
 
+#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
+
 static int gfs2_glocks_open(struct inode *inode, struct file *file)
 {
        int ret = seq_open_private(file, &gfs2_glock_seq_ops,
@@ -1972,6 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
                struct seq_file *seq = file->private_data;
                struct gfs2_glock_iter *gi = seq->private;
                gi->sdp = inode->i_private;
+               seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
+               if (seq->buf)
+                       seq->size = GFS2_SEQ_GOODSIZE;
        }
        return ret;
 }
@@ -1984,6 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
                struct seq_file *seq = file->private_data;
                struct gfs2_glock_iter *gi = seq->private;
                gi->sdp = inode->i_private;
+               seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
+               if (seq->buf)
+                       seq->size = GFS2_SEQ_GOODSIZE;
        }
        return ret;
 }
index 67fd6beffeced9b7a91733d52d39ba5e8aec95f5..aaecc8085fc55945009c855eda554079cf100b59 100644 (file)
@@ -84,17 +84,22 @@ struct gfs2_rgrpd {
        u32 rd_data;                    /* num of data blocks in rgrp */
        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        u32 rd_free;
+       u32 rd_reserved;                /* number of blocks reserved */
        u32 rd_free_clone;
        u32 rd_dinodes;
        u64 rd_igeneration;
        struct gfs2_bitmap *rd_bits;
        struct gfs2_sbd *rd_sbd;
+       struct gfs2_rgrp_lvb *rd_rgl;
        u32 rd_last_alloc;
        u32 rd_flags;
 #define GFS2_RDF_CHECK         0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE      0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR         0x40000000 /* error in rg */
 #define GFS2_RDF_MASK          0xf0000000 /* mask for internal flags */
+       spinlock_t rd_rsspin;           /* protects reservation related vars */
+       struct rb_root rd_rstree;       /* multi-block reservation tree */
+       u32 rd_rs_cnt;                  /* count of current reservations */
 };
 
 enum gfs2_state_bits {
@@ -232,6 +237,38 @@ struct gfs2_holder {
        unsigned long gh_ip;
 };
 
+/* Resource group multi-block reservation, in order of appearance:
+
+   Step 1. Function prepares to write, allocates a mb, sets the size hint.
+   Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
+   Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
+   Step 4. Bits are assigned from the rgrp based on either the reservation
+           or wherever it can.
+*/
+
+struct gfs2_blkreserv {
+       /* components used during write (step 1): */
+       atomic_t rs_sizehint;         /* hint of the write size */
+
+       /* components used during inplace_reserve (step 2): */
+       u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
+
+       /* components used during get_local_rgrp (step 3): */
+       struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
+       struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
+       struct rb_node rs_node;       /* link to other block reservations */
+
+       /* components used during block searches and assignments (step 4): */
+       struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
+       u32 rs_biblk;                 /* start block relative to the bi */
+       u32 rs_free;                  /* how many blocks are still free */
+
+       /* ancillary quota stuff */
+       struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+       struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+       unsigned int rs_qa_qd_num;
+};
+
 enum {
        GLF_LOCK                        = 1,
        GLF_DEMOTE                      = 3,
@@ -289,18 +326,6 @@ struct gfs2_glock {
 
 #define GFS2_MIN_LVB_SIZE 32   /* Min size of LVB that gfs2 supports */
 
-struct gfs2_qadata { /* quota allocation data */
-       /* Quota stuff */
-       struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
-       struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
-       unsigned int qa_qd_num;
-};
-
-struct gfs2_blkreserv {
-       u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-       struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
-};
-
 enum {
        GIF_INVALID             = 0,
        GIF_QD_LOCKED           = 1,
@@ -308,7 +333,6 @@ enum {
        GIF_SW_PAGED            = 3,
 };
 
-
 struct gfs2_inode {
        struct inode i_inode;
        u64 i_no_addr;
@@ -319,8 +343,7 @@ struct gfs2_inode {
        struct gfs2_glock *i_gl; /* Move into i_gh? */
        struct gfs2_holder i_iopen_gh;
        struct gfs2_holder i_gh; /* for prepare/commit_write only */
-       struct gfs2_qadata *i_qadata; /* quota allocation data */
-       struct gfs2_blkreserv *i_res; /* resource group block reservation */
+       struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
        struct gfs2_rgrpd *i_rgd;
        u64 i_goal;     /* goal block for allocations */
        struct rw_semaphore i_rw_mutex;
@@ -473,6 +496,7 @@ struct gfs2_args {
        unsigned int ar_discard:1;              /* discard requests */
        unsigned int ar_errors:2;               /* errors=withdraw | panic */
        unsigned int ar_nobarrier:1;            /* do not send barriers */
+       unsigned int ar_rgrplvb:1;              /* use lvbs for rgrp info */
        int ar_commit;                          /* Commit interval */
        int ar_statfs_quantum;                  /* The fast statfs interval */
        int ar_quota_quantum;                   /* The quota interval */
index 867674785fcf04909ae03cd5d6f95752fcaeee6d..4ce22e54730806e02ed0ba70f7e7fc847d513142 100644 (file)
@@ -521,12 +521,13 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        int error;
 
        munge_mode_uid_gid(dip, &mode, &uid, &gid);
-       if (!gfs2_qadata_get(dip))
-               return -ENOMEM;
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
 
        error = gfs2_quota_lock(dip, uid, gid);
        if (error)
-               goto out;
+               return error;
 
        error = gfs2_quota_check(dip, uid, gid);
        if (error)
@@ -542,8 +543,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 out_quota:
        gfs2_quota_unlock(dip);
-out:
-       gfs2_qadata_put(dip);
        return error;
 }
 
@@ -551,14 +550,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                       struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-       struct gfs2_qadata *qa;
        int alloc_required;
        struct buffer_head *dibh;
        int error;
 
-       qa = gfs2_qadata_get(dip);
-       if (!qa)
-               return -ENOMEM;
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
 
        error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
@@ -605,13 +603,13 @@ fail_end_trans:
        gfs2_trans_end(sdp);
 
 fail_ipreserv:
-       gfs2_inplace_release(dip);
+       if (alloc_required)
+               gfs2_inplace_release(dip);
 
 fail_quota_locks:
        gfs2_quota_unlock(dip);
 
 fail:
-       gfs2_qadata_put(dip);
        return error;
 }
 
@@ -657,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        const struct qstr *name = &dentry->d_name;
        struct gfs2_holder ghs[2];
        struct inode *inode = NULL;
-       struct gfs2_inode *dip = GFS2_I(dir);
+       struct gfs2_inode *dip = GFS2_I(dir), *ip;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
@@ -667,6 +665,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        if (!name->len || name->len > GFS2_FNAMESIZE)
                return -ENAMETOOLONG;
 
+       /* We need a reservation to allocate the new dinode block. The
+          directory ip temporarily points to the reservation, but this is
+          being done to get a set of contiguous blocks for the new dinode.
+          Since this is a create, we don't have a sizehint yet, so it will
+          have to use the minimum reservation size. */
+       error = gfs2_rs_alloc(dip);
+       if (error)
+               return error;
+
        error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        if (error)
                goto fail;
@@ -700,19 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(inode))
                goto fail_gunlock2;
 
-       error = gfs2_inode_refresh(GFS2_I(inode));
+       ip = GFS2_I(inode);
+       error = gfs2_inode_refresh(ip);
        if (error)
                goto fail_gunlock2;
 
+       /* The newly created inode needs a reservation so it can allocate
+          xattrs. At the same time, we want new blocks allocated to the new
+          dinode to be as contiguous as possible. Since we allocated the
+          dinode block under the directory's reservation, we transfer
+          ownership of that reservation to the new inode. The directory
+          doesn't need a reservation unless it needs a new allocation. */
+       ip->i_res = dip->i_res;
+       dip->i_res = NULL;
+
        error = gfs2_acl_create(dip, inode);
        if (error)
                goto fail_gunlock2;
 
-       error = gfs2_security_init(dip, GFS2_I(inode), name);
+       error = gfs2_security_init(dip, ip, name);
        if (error)
                goto fail_gunlock2;
 
-       error = link_dinode(dip, name, GFS2_I(inode));
+       error = link_dinode(dip, name, ip);
        if (error)
                goto fail_gunlock2;
 
@@ -722,10 +739,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        gfs2_trans_end(sdp);
        /* Check if we reserved space in the rgrp. Function link_dinode may
           not, depending on whether alloc is required. */
-       if (dip->i_res)
+       if (gfs2_mb_reserved(dip))
                gfs2_inplace_release(dip);
        gfs2_quota_unlock(dip);
-       gfs2_qadata_put(dip);
        mark_inode_dirty(inode);
        gfs2_glock_dq_uninit_m(2, ghs);
        d_instantiate(dentry, inode);
@@ -740,6 +756,7 @@ fail_gunlock:
                iput(inode);
        }
 fail:
+       gfs2_rs_delete(dip);
        if (bh)
                brelse(bh);
        return error;
@@ -816,6 +833,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (S_ISDIR(inode->i_mode))
                return -EPERM;
 
+       error = gfs2_rs_alloc(dip);
+       if (error)
+               return error;
+
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
@@ -867,16 +888,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        error = 0;
 
        if (alloc_required) {
-               struct gfs2_qadata *qa = gfs2_qadata_get(dip);
-
-               if (!qa) {
-                       error = -ENOMEM;
-                       goto out_gunlock;
-               }
-
                error = gfs2_quota_lock_check(dip);
                if (error)
-                       goto out_alloc;
+                       goto out_gunlock;
 
                error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
                if (error)
@@ -919,9 +933,6 @@ out_ipres:
 out_gunlock_q:
        if (alloc_required)
                gfs2_quota_unlock(dip);
-out_alloc:
-       if (alloc_required)
-               gfs2_qadata_put(dip);
 out_gunlock:
        gfs2_glock_dq(ghs + 1);
 out_child:
@@ -1231,6 +1242,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        if (error)
                return error;
 
+       error = gfs2_rs_alloc(ndip);
+       if (error)
+               return error;
+
        if (odip != ndip) {
                error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
                                           0, &r_gh);
@@ -1354,16 +1369,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                goto out_gunlock;
 
        if (alloc_required) {
-               struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
-
-               if (!qa) {
-                       error = -ENOMEM;
-                       goto out_gunlock;
-               }
-
                error = gfs2_quota_lock_check(ndip);
                if (error)
-                       goto out_alloc;
+                       goto out_gunlock;
 
                error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
                if (error)
@@ -1424,9 +1432,6 @@ out_ipreserv:
 out_gunlock_q:
        if (alloc_required)
                gfs2_quota_unlock(ndip);
-out_alloc:
-       if (alloc_required)
-               gfs2_qadata_put(ndip);
 out_gunlock:
        while (x--) {
                gfs2_glock_dq(ghs + x);
@@ -1587,12 +1592,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
                ogid = ngid = NO_QUOTA_CHANGE;
 
-       if (!gfs2_qadata_get(ip))
-               return -ENOMEM;
-
        error = gfs2_quota_lock(ip, nuid, ngid);
        if (error)
-               goto out_alloc;
+               return error;
 
        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
                error = gfs2_quota_check(ip, nuid, ngid);
@@ -1618,8 +1620,6 @@ out_end_trans:
        gfs2_trans_end(sdp);
 out_gunlock_q:
        gfs2_quota_unlock(ip);
-out_alloc:
-       gfs2_qadata_put(ip);
        return error;
 }
 
@@ -1641,6 +1641,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
        struct gfs2_holder i_gh;
        int error;
 
+       error = gfs2_rs_alloc(ip);
+       if (error)
+               return error;
+
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
        if (error)
                return error;
index 852c1be1dd3be6ed8d8883182279db820943a2f7..8ff95a2d54ee7dae3de7a9ad6945b5a6c0f17b13 100644 (file)
@@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
                goto out;
        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-       gfs2_meta_check(sdp, bd->bd_bh);
-       gfs2_pin(sdp, bd->bd_bh);
        mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+       if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+               printk(KERN_ERR
+                      "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
+                      (unsigned long long)bd->bd_bh->b_blocknr);
+               BUG();
+       }
+       gfs2_pin(sdp, bd->bd_bh);
        mh->__pad0 = cpu_to_be64(0);
        mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
        sdp->sd_log_num_buf++;
index 6cdb0f2a1b09d93bd0c64b0d79055b6af0cfe5b8..e04d0e09ee7b59d5959d69e5df872a5871e9ae64 100644 (file)
@@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo)
        inode_init_once(&ip->i_inode);
        init_rwsem(&ip->i_rw_mutex);
        INIT_LIST_HEAD(&ip->i_trunc_list);
-       ip->i_qadata = NULL;
        ip->i_res = NULL;
        ip->i_hash_cache = NULL;
 }
index 6c1e5d1c404a1feb3a714495fc6e34d575d5ad56..3a56c8d94de0607249e6296a18ec28b5ea120e89 100644 (file)
@@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct buffer_head *bh;
 
-       if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+       if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+               *bhp = NULL;
                return -EIO;
+       }
 
        *bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
 
@@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
                if (tr && tr->tr_touched)
                        gfs2_io_error_bh(sdp, bh);
                brelse(bh);
+               *bhp = NULL;
                return -EIO;
        }
 
index 6c906078f657024c6f17c663b413a97de643e8b0..e5af9dc420ef0c3f1d9a40034b7a63b5791c04f0 100644 (file)
@@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
        }
 
        error = init_names(sdp, silent);
-       if (error)
-               goto fail;
+       if (error) {
+               /* In this case, we haven't initialized sysfs, so we have to
+                  manually free the sdp. */
+               free_percpu(sdp->sd_lkstats);
+               kfree(sdp);
+               sb->s_fs_info = NULL;
+               return error;
+       }
 
        snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
 
-       gfs2_create_debugfs_file(sdp);
-
        error = gfs2_sys_fs_add(sdp);
+       /*
+        * If we hit an error here, gfs2_sys_fs_add will have called function
+        * kobject_put which causes the sysfs usage count to go to zero, which
+        * causes sysfs to call function gfs2_sbd_release, which frees sdp.
+        * Subsequent error paths here will call gfs2_sys_fs_del, which also
+        * kobject_put to free sdp.
+        */
        if (error)
-               goto fail;
+               return error;
+
+       gfs2_create_debugfs_file(sdp);
 
        error = gfs2_lm_mount(sdp, silent);
        if (error)
-               goto fail_sys;
+               goto fail_debug;
 
        error = init_locking(sdp, &mount_gh, DO);
        if (error)
@@ -1215,12 +1228,12 @@ fail_locking:
 fail_lm:
        gfs2_gl_hash_clear(sdp);
        gfs2_lm_unmount(sdp);
-fail_sys:
-       gfs2_sys_fs_del(sdp);
-fail:
+fail_debug:
        gfs2_delete_debugfs_file(sdp);
        free_percpu(sdp->sd_lkstats);
-       kfree(sdp);
+       /* gfs2_sys_fs_del must be the last thing we do, since it causes
+        * sysfs to call function gfs2_sbd_release, which frees sdp. */
+       gfs2_sys_fs_del(sdp);
        sb->s_fs_info = NULL;
        return error;
 }
@@ -1389,10 +1402,9 @@ static void gfs2_kill_sb(struct super_block *sb)
        sdp->sd_root_dir = NULL;
        sdp->sd_master_dir = NULL;
        shrink_dcache_sb(sb);
-       kill_block_super(sb);
        gfs2_delete_debugfs_file(sdp);
        free_percpu(sdp->sd_lkstats);
-       kfree(sdp);
+       kill_block_super(sb);
 }
 
 struct file_system_type gfs2_fs_type = {
index 27b5cc7d688166d4e9a74525d6e1ad00133fe94f..a3bde91645c29489fce1097456955235f2a63a12 100644 (file)
@@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_qadata *qa = ip->i_qadata;
-       struct gfs2_quota_data **qd = qa->qa_qd;
+       struct gfs2_quota_data **qd;
        int error;
 
-       if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
+       if (ip->i_res == NULL)
+               gfs2_rs_alloc(ip);
+
+       qd = ip->i_res->rs_qa_qd;
+
+       if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
            gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
                return -EIO;
 
@@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
        error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
        if (error)
                goto out;
-       qa->qa_qd_num++;
+       ip->i_res->rs_qa_qd_num++;
        qd++;
 
        error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
        if (error)
                goto out;
-       qa->qa_qd_num++;
+       ip->i_res->rs_qa_qd_num++;
        qd++;
 
        if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
                error = qdsb_get(sdp, QUOTA_USER, uid, qd);
                if (error)
                        goto out;
-               qa->qa_qd_num++;
+               ip->i_res->rs_qa_qd_num++;
                qd++;
        }
 
@@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
                error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
                if (error)
                        goto out;
-               qa->qa_qd_num++;
+               ip->i_res->rs_qa_qd_num++;
                qd++;
        }
 
@@ -542,16 +546,17 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_qadata *qa = ip->i_qadata;
        unsigned int x;
 
+       if (ip->i_res == NULL)
+               return;
        gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
 
-       for (x = 0; x < qa->qa_qd_num; x++) {
-               qdsb_put(qa->qa_qd[x]);
-               qa->qa_qd[x] = NULL;
+       for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+               qdsb_put(ip->i_res->rs_qa_qd[x]);
+               ip->i_res->rs_qa_qd[x] = NULL;
        }
-       qa->qa_qd_num = 0;
+       ip->i_res->rs_qa_qd_num = 0;
 }
 
 static int sort_qd(const void *a, const void *b)
@@ -764,6 +769,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        unsigned int nalloc = 0, blocks;
        int error;
 
+       error = gfs2_rs_alloc(ip);
+       if (error)
+               return error;
+
        gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
                              &data_blocks, &ind_blocks);
 
@@ -915,7 +924,6 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        unsigned int x;
        int error = 0;
@@ -928,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
            sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
 
-       sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
-            sort_qd, NULL);
+       sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
+            sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 
-       for (x = 0; x < qa->qa_qd_num; x++) {
+       for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
                int force = NO_FORCE;
-               qd = qa->qa_qd[x];
+               qd = ip->i_res->rs_qa_qd[x];
                if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
                        force = FORCE;
-               error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
+               error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
                if (error)
                        break;
        }
@@ -945,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
                set_bit(GIF_QD_LOCKED, &ip->i_flags);
        else {
                while (x--)
-                       gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+                       gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
                gfs2_quota_unhold(ip);
        }
 
@@ -990,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd)
 
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-       struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qda[4];
        unsigned int count = 0;
        unsigned int x;
@@ -998,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
        if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
                goto out;
 
-       for (x = 0; x < qa->qa_qd_num; x++) {
+       for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
                int sync;
 
-               qd = qa->qa_qd[x];
+               qd = ip->i_res->rs_qa_qd[x];
                sync = need_sync(qd);
 
-               gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+               gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
 
                if (sync && qd_trylock(qd))
                        qda[count++] = qd;
@@ -1038,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        s64 value;
        unsigned int x;
@@ -1050,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
         if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                 return 0;
 
-       for (x = 0; x < qa->qa_qd_num; x++) {
-               qd = qa->qa_qd[x];
+       for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+               qd = ip->i_res->rs_qa_qd[x];
 
                if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1089,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
                       u32 uid, u32 gid)
 {
-       struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        unsigned int x;
 
@@ -1098,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
                return;
 
-       for (x = 0; x < qa->qa_qd_num; x++) {
-               qd = qa->qa_qd[x];
+       for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+               qd = ip->i_res->rs_qa_qd[x];
 
                if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
@@ -1549,10 +1554,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        if (error)
                return error;
 
+       error = gfs2_rs_alloc(ip);
+       if (error)
+               goto out_put;
+
        mutex_lock(&ip->i_inode.i_mutex);
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
        if (error)
-               goto out_put;
+               goto out_unlockput;
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
        if (error)
                goto out_q;
@@ -1609,8 +1618,9 @@ out_i:
        gfs2_glock_dq_uninit(&i_gh);
 out_q:
        gfs2_glock_dq_uninit(&q_gh);
-out_put:
+out_unlockput:
        mutex_unlock(&ip->i_inode.i_mutex);
+out_put:
        qd_put(qd);
        return error;
 }
index f74fb9bd1973d318eab91d001fb568cc9018c27b..4d34887a601d966660549b0d0a27517353c1ed5e 100644 (file)
@@ -35,6 +35,9 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+#define RSRV_CONTENTION_FACTOR 4
+#define RGRP_RSRV_MAX_CONTENDERS 2
+
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0x55555555UL)
 #define LBITSKIP55 (0x55555555UL)
@@ -177,6 +180,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
        return tmp;
 }
 
+/**
+ * rs_cmp - multi-block reservation range compare
+ * @blk: absolute file system block number of the new reservation
+ * @len: number of blocks in the new reservation
+ * @rs: existing reservation to compare against
+ *
+ * returns: 1 if the block range is beyond the reach of the reservation
+ *         -1 if the block range is before the start of the reservation
+ *          0 if the block range overlaps with the reservation
+ */
+static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+{
+       u64 startblk = gfs2_rs_startblk(rs);
+
+       if (blk >= startblk + rs->rs_free)
+               return 1;
+       if (blk + len - 1 < startblk)
+               return -1;
+       return 0;
+}
+
+/**
+ * rs_find - Find a rgrp multi-block reservation that contains a given block
+ * @rgd: The rgrp
+ * @rgblk: The block we're looking for, relative to the rgrp
+ */
+static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
+{
+       struct rb_node **newn;
+       int rc;
+       u64 fsblk = rgblk + rgd->rd_data0;
+
+       spin_lock(&rgd->rd_rsspin);
+       newn = &rgd->rd_rstree.rb_node;
+       while (*newn) {
+               struct gfs2_blkreserv *cur =
+                       rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+               rc = rs_cmp(fsblk, 1, cur);
+               if (rc < 0)
+                       newn = &((*newn)->rb_left);
+               else if (rc > 0)
+                       newn = &((*newn)->rb_right);
+               else {
+                       spin_unlock(&rgd->rd_rsspin);
+                       return cur;
+               }
+       }
+       spin_unlock(&rgd->rd_rsspin);
+       return NULL;
+}
+
 /**
  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
  *       a block in a given allocation state.
@@ -417,6 +471,137 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
        }
 }
 
+/**
+ * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
+ * @ip: the inode for this reservation
+ */
+int gfs2_rs_alloc(struct gfs2_inode *ip)
+{
+       int error = 0;
+       struct gfs2_blkreserv *res;
+
+       if (ip->i_res)
+               return 0;
+
+       res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+       if (!res)
+               error = -ENOMEM;
+
+       down_write(&ip->i_rw_mutex);
+       if (ip->i_res)
+               kmem_cache_free(gfs2_rsrv_cachep, res);
+       else
+               ip->i_res = res;
+       up_write(&ip->i_rw_mutex);
+       return error;
+}
+
+static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+{
+       gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
+                      rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
+                      rs->rs_free);
+}
+
+/**
+ * __rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+static void __rs_deltree(struct gfs2_blkreserv *rs)
+{
+       struct gfs2_rgrpd *rgd;
+
+       if (!gfs2_rs_active(rs))
+               return;
+
+       rgd = rs->rs_rgd;
+       /* We can't do this: The reason is that when the rgrp is invalidated,
+          it's in the "middle" of acquiring the glock, but the HOLDER bit
+          isn't set yet:
+          BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
+       trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
+
+       if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
+               rb_erase(&rs->rs_node, &rgd->rd_rstree);
+       BUG_ON(!rgd->rd_rs_cnt);
+       rgd->rd_rs_cnt--;
+
+       if (rs->rs_free) {
+               /* return reserved blocks to the rgrp and the ip */
+               BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
+               rs->rs_rgd->rd_reserved -= rs->rs_free;
+               rs->rs_free = 0;
+               clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+               smp_mb__after_clear_bit();
+       }
+       /* We can't change any of the step 1 or step 2 components of the rs.
+          E.g. We can't set rs_rgd to NULL because the rgd glock is held and
+          dequeued through this pointer.
+          Can't: atomic_set(&rs->rs_sizehint, 0);
+          Can't: rs->rs_requested = 0;
+          Can't: rs->rs_rgd = NULL;*/
+       rs->rs_bi = NULL;
+       rs->rs_biblk = 0;
+}
+
+/**
+ * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+{
+       struct gfs2_rgrpd *rgd;
+
+       if (!gfs2_rs_active(rs))
+               return;
+
+       rgd = rs->rs_rgd;
+       spin_lock(&rgd->rd_rsspin);
+       __rs_deltree(rs);
+       spin_unlock(&rgd->rd_rsspin);
+}
+
+/**
+ * gfs2_rs_delete - delete a multi-block reservation
+ * @ip: The inode for this reservation
+ *
+ */
+void gfs2_rs_delete(struct gfs2_inode *ip)
+{
+       down_write(&ip->i_rw_mutex);
+       if (ip->i_res) {
+               gfs2_rs_deltree(ip->i_res);
+               trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
+               BUG_ON(ip->i_res->rs_free);
+               kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
+               ip->i_res = NULL;
+       }
+       up_write(&ip->i_rw_mutex);
+}
+
+/**
+ * return_all_reservations - return all reserved blocks back to the rgrp.
+ * @rgd: the rgrp that needs its space back
+ *
+ * We previously reserved a bunch of blocks for allocation. Now we need to
+ * give them back. This leave the reservation structures in tact, but removes
+ * all of their corresponding "no-fly zones".
+ */
+static void return_all_reservations(struct gfs2_rgrpd *rgd)
+{
+       struct rb_node *n;
+       struct gfs2_blkreserv *rs;
+
+       spin_lock(&rgd->rd_rsspin);
+       while ((n = rb_first(&rgd->rd_rstree))) {
+               rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+               __rs_deltree(rs);
+       }
+       spin_unlock(&rgd->rd_rsspin);
+}
+
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
        struct rb_node *n;
@@ -439,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
                gfs2_free_clones(rgd);
                kfree(rgd->rd_bits);
+               return_all_reservations(rgd);
                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
        }
 }
@@ -616,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
        rgd->rd_data = be32_to_cpu(buf.ri_data);
        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
+       spin_lock_init(&rgd->rd_rsspin);
 
        error = compute_bitstructs(rgd);
        if (error)
@@ -627,6 +814,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
                goto fail;
 
        rgd->rd_gl->gl_object = rgd;
+       rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb;
        rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
        if (rgd->rd_data > sdp->sd_max_rg_data)
                sdp->sd_max_rg_data = rgd->rd_data;
@@ -736,9 +924,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
+static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
+{
+       struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+       struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+
+       if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
+           rgl->rl_dinodes != str->rg_dinodes ||
+           rgl->rl_igeneration != str->rg_igeneration)
+               return 0;
+       return 1;
+}
+
+static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
+{
+       const struct gfs2_rgrp *str = buf;
+
+       rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
+       rgl->rl_flags = str->rg_flags;
+       rgl->rl_free = str->rg_free;
+       rgl->rl_dinodes = str->rg_dinodes;
+       rgl->rl_igeneration = str->rg_igeneration;
+       rgl->__pad = 0UL;
+}
+
+static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
+{
+       struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+       u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
+       rgl->rl_unlinked = cpu_to_be32(unlinked);
+}
+
+static u32 count_unlinked(struct gfs2_rgrpd *rgd)
+{
+       struct gfs2_bitmap *bi;
+       const u32 length = rgd->rd_length;
+       const u8 *buffer = NULL;
+       u32 i, goal, count = 0;
+
+       for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
+               goal = 0;
+               buffer = bi->bi_bh->b_data + bi->bi_offset;
+               WARN_ON(!buffer_uptodate(bi->bi_bh));
+               while (goal < bi->bi_len * GFS2_NBBY) {
+                       goal = gfs2_bitfit(buffer, bi->bi_len, goal,
+                                          GFS2_BLKST_UNLINKED);
+                       if (goal == BFITNOENT)
+                               break;
+                       count++;
+                       goal++;
+               }
+       }
+
+       return count;
+}
+
+
 /**
- * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
- * @gh: The glock holder for the resource group
+ * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
+ * @rgd: the struct gfs2_rgrpd describing the RG to read in
  *
  * Read in all of a Resource Group's header and bitmap blocks.
  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -746,9 +990,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
  * Returns: errno
  */
 
-int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
-       struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_glock *gl = rgd->rd_gl;
        unsigned int length = rgd->rd_length;
@@ -756,6 +999,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
        unsigned int x, y;
        int error;
 
+       if (rgd->rd_bits[0].bi_bh != NULL)
+               return 0;
+
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
@@ -782,7 +1028,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                rgd->rd_free_clone = rgd->rd_free;
        }
-
+       if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+               rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
+               gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
+                                    rgd->rd_bits[0].bi_bh->b_data);
+       }
+       else if (sdp->sd_args.ar_rgrplvb) {
+               if (!gfs2_rgrp_lvb_valid(rgd)){
+                       gfs2_consist_rgrpd(rgd);
+                       error = -EIO;
+                       goto fail;
+               }
+               if (rgd->rd_rgl->rl_unlinked == 0)
+                       rgd->rd_flags &= ~GFS2_RDF_CHECK;
+       }
        return 0;
 
 fail:
@@ -796,6 +1055,39 @@ fail:
        return error;
 }
 
+int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+{
+       u32 rl_flags;
+
+       if (rgd->rd_flags & GFS2_RDF_UPTODATE)
+               return 0;
+
+       if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+               return gfs2_rgrp_bh_get(rgd);
+
+       rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
+       rl_flags &= ~GFS2_RDF_MASK;
+       rgd->rd_flags &= GFS2_RDF_MASK;
+       rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
+       if (rgd->rd_rgl->rl_unlinked == 0)
+               rgd->rd_flags &= ~GFS2_RDF_CHECK;
+       rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+       rgd->rd_free_clone = rgd->rd_free;
+       rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
+       rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
+       return 0;
+}
+
+int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+{
+       struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
+       struct gfs2_sbd *sdp = rgd->rd_sbd;
+
+       if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
+               return 0;
+       return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
+}
+
 /**
  * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
  * @gh: The glock holder for the resource group
@@ -809,8 +1101,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
 
        for (x = 0; x < length; x++) {
                struct gfs2_bitmap *bi = rgd->rd_bits + x;
-               brelse(bi->bi_bh);
-               bi->bi_bh = NULL;
+               if (bi->bi_bh) {
+                       brelse(bi->bi_bh);
+                       bi->bi_bh = NULL;
+               }
        }
 
 }
@@ -954,6 +1248,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
                                gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
                                gfs2_rgrp_out(rgd, bh->b_data);
+                               gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
                                gfs2_trans_end(sdp);
                        }
                }
@@ -974,38 +1269,184 @@ out:
 }
 
 /**
- * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
- * @ip: the incore GFS2 inode structure
+ * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
+ * @bi: the bitmap with the blocks
+ * @ip: the inode structure
+ * @biblk: the 32-bit block number relative to the start of the bitmap
+ * @amount: the number of blocks to reserve
  *
- * Returns: the struct gfs2_qadata
+ * Returns: NULL - reservation was already taken, so not inserted
+ *          pointer to the inserted reservation
  */
+static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
+                                      struct gfs2_inode *ip, u32 biblk,
+                                      int amount)
+{
+       struct rb_node **newn, *parent = NULL;
+       int rc;
+       struct gfs2_blkreserv *rs = ip->i_res;
+       struct gfs2_rgrpd *rgd = rs->rs_rgd;
+       u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
 
-struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
+       spin_lock(&rgd->rd_rsspin);
+       newn = &rgd->rd_rstree.rb_node;
+       BUG_ON(!ip->i_res);
+       BUG_ON(gfs2_rs_active(rs));
+       /* Figure out where to put new node */
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+       while (*newn) {
+               struct gfs2_blkreserv *cur =
+                       rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+
+               parent = *newn;
+               rc = rs_cmp(fsblock, amount, cur);
+               if (rc > 0)
+                       newn = &((*newn)->rb_right);
+               else if (rc < 0)
+                       newn = &((*newn)->rb_left);
+               else {
+                       spin_unlock(&rgd->rd_rsspin);
+                       return NULL; /* reservation already in use */
+               }
+       }
+
+       /* Do our reservation work */
+       rs = ip->i_res;
+       rs->rs_free = amount;
+       rs->rs_biblk = biblk;
+       rs->rs_bi = bi;
+       rb_link_node(&rs->rs_node, parent, newn);
+       rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
+
+       /* Do our inode accounting for the reservation */
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+
+       /* Do our rgrp accounting for the reservation */
+       rgd->rd_reserved += amount; /* blocks reserved */
+       rgd->rd_rs_cnt++; /* number of in-tree reservations */
+       spin_unlock(&rgd->rd_rsspin);
+       trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
+       return rs;
+}
+
+/**
+ * unclaimed_blocks - return number of blocks that aren't spoken for
+ */
+static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
 {
-       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       int error;
-       BUG_ON(ip->i_qadata != NULL);
-       ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
-       error = gfs2_rindex_update(sdp);
-       if (error)
-               fs_warn(sdp, "rindex update returns %d\n", error);
-       return ip->i_qadata;
+       return rgd->rd_free_clone - rgd->rd_reserved;
 }
 
 /**
- * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
- * @ip: the incore GFS2 inode structure
+ * rg_mblk_search - find a group of multiple free blocks
+ * @rgd: the resource group descriptor
+ * @rs: the block reservation
+ * @ip: pointer to the inode for which we're reserving blocks
  *
- * Returns: the struct gfs2_qadata
+ * This is very similar to rgblk_search, except we're looking for whole
+ * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
+ * on aligned dwords for speed's sake.
+ *
+ * Returns: 0 if successful or BFITNOENT if there isn't enough free space
  */
 
-static int gfs2_blkrsv_get(struct gfs2_inode *ip)
+static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-       BUG_ON(ip->i_res != NULL);
-       ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-       if (!ip->i_res)
-               return -ENOMEM;
-       return 0;
+       struct gfs2_bitmap *bi = rgd->rd_bits;
+       const u32 length = rgd->rd_length;
+       u32 blk;
+       unsigned int buf, x, search_bytes;
+       u8 *buffer = NULL;
+       u8 *ptr, *end, *nonzero;
+       u32 goal, rsv_bytes;
+       struct gfs2_blkreserv *rs;
+       u32 best_rs_bytes, unclaimed;
+       int best_rs_blocks;
+
+       /* Find bitmap block that contains bits for goal block */
+       if (rgrp_contains_block(rgd, ip->i_goal))
+               goal = ip->i_goal - rgd->rd_data0;
+       else
+               goal = rgd->rd_last_alloc;
+       for (buf = 0; buf < length; buf++) {
+               bi = rgd->rd_bits + buf;
+               /* Convert scope of "goal" from rgrp-wide to within
+                  found bit block */
+               if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+                       goal -= bi->bi_start * GFS2_NBBY;
+                       goto do_search;
+               }
+       }
+       buf = 0;
+       goal = 0;
+
+do_search:
+       best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
+                              (RGRP_RSRV_MINBLKS * rgd->rd_length));
+       best_rs_bytes = (best_rs_blocks *
+                        (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
+               GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
+       best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
+       unclaimed = unclaimed_blocks(rgd);
+       if (best_rs_bytes * GFS2_NBBY > unclaimed)
+               best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
+
+       for (x = 0; x <= length; x++) {
+               bi = rgd->rd_bits + buf;
+
+               if (test_bit(GBF_FULL, &bi->bi_flags))
+                       goto skip;
+
+               WARN_ON(!buffer_uptodate(bi->bi_bh));
+               if (bi->bi_clone)
+                       buffer = bi->bi_clone + bi->bi_offset;
+               else
+                       buffer = bi->bi_bh->b_data + bi->bi_offset;
+
+               /* We have to keep the reservations aligned on u64 boundaries
+                  otherwise we could get situations where a byte can't be
+                  used because it's after a reservation, but a free bit still
+                  is within the reservation's area. */
+               ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
+               end = (buffer + bi->bi_len);
+               while (ptr < end) {
+                       rsv_bytes = 0;
+                       if ((ptr + best_rs_bytes) <= end)
+                               search_bytes = best_rs_bytes;
+                       else
+                               search_bytes = end - ptr;
+                       BUG_ON(!search_bytes);
+                       nonzero = memchr_inv(ptr, 0, search_bytes);
+                       /* If the lot is all zeroes, reserve the whole size. If
+                          there's enough zeroes to satisfy the request, use
+                          what we can. If there's not enough, keep looking. */
+                       if (nonzero == NULL)
+                               rsv_bytes = search_bytes;
+                       else if ((nonzero - ptr) * GFS2_NBBY >=
+                                ip->i_res->rs_requested)
+                               rsv_bytes = (nonzero - ptr);
+
+                       if (rsv_bytes) {
+                               blk = ((ptr - buffer) * GFS2_NBBY);
+                               BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
+                               rs = rs_insert(bi, ip, blk,
+                                              rsv_bytes * GFS2_NBBY);
+                               if (IS_ERR(rs))
+                                       return PTR_ERR(rs);
+                               if (rs)
+                                       return 0;
+                       }
+                       ptr += ALIGN(search_bytes, sizeof(u64));
+               }
+skip:
+               /* Try next bitmap block (wrap back to rgrp header
+                  if at end) */
+               buf++;
+               buf %= length;
+               goal = 0;
+       }
+
+       return BFITNOENT;
 }
 
 /**
@@ -1014,24 +1455,26 @@ static int gfs2_blkrsv_get(struct gfs2_inode *ip)
  * @ip: the inode
  *
  * If there's room for the requested blocks to be allocated from the RG:
+ * This will try to get a multi-block reservation first, and if that doesn't
+ * fit, it will take what it can.
  *
  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
  */
 
-static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-       const struct gfs2_blkreserv *rs = ip->i_res;
+       struct gfs2_blkreserv *rs = ip->i_res;
 
        if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
                return 0;
-       if (rgd->rd_free_clone >= rs->rs_requested)
+       /* Look for a multi-block reservation. */
+       if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
+           rg_mblk_search(rgd, ip) != BFITNOENT)
+               return 1;
+       if (unclaimed_blocks(rgd) >= rs->rs_requested)
                return 1;
-       return 0;
-}
 
-static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
-{
-       return (bi->bi_start * GFS2_NBBY) + blk;
+       return 0;
 }
 
 /**
@@ -1101,119 +1544,120 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 }
 
 /**
- * get_local_rgrp - Choose and lock a rgrp for allocation
+ * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
- * @last_unlinked: the last unlinked block
- *
- * Try to acquire rgrp in way which avoids contending with others.
+ * @requested: the number of blocks to be reserved
  *
  * Returns: errno
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_rgrpd *rgd, *begin = NULL;
+       struct gfs2_rgrpd *begin = NULL;
        struct gfs2_blkreserv *rs = ip->i_res;
-       int error, rg_locked, flags = LM_FLAG_TRY;
+       int error = 0, rg_locked, flags = LM_FLAG_TRY;
+       u64 last_unlinked = NO_BLOCK;
        int loops = 0;
 
-       if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
-               rgd = begin = ip->i_rgd;
-       else
-               rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
-
-       if (rgd == NULL)
+       if (sdp->sd_args.ar_rgrplvb)
+               flags |= GL_SKIP;
+       rs->rs_requested = requested;
+       if (gfs2_assert_warn(sdp, requested)) {
+               error = -EINVAL;
+               goto out;
+       }
+       if (gfs2_rs_active(rs)) {
+               begin = rs->rs_rgd;
+               flags = 0; /* Yoda: Do or do not. There is no try */
+       } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
+               rs->rs_rgd = begin = ip->i_rgd;
+       } else {
+               rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+       }
+       if (rs->rs_rgd == NULL)
                return -EBADSLT;
 
        while (loops < 3) {
                rg_locked = 0;
 
-               if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+               if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
                        rg_locked = 1;
                        error = 0;
+               } else if (!loops && !gfs2_rs_active(rs) &&
+                          rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
+                       /* If the rgrp already is maxed out for contenders,
+                          we can eliminate it as a "first pass" without even
+                          requesting the rgrp glock. */
+                       error = GLR_TRYFAILED;
                } else {
-                       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-                                                  flags, &rs->rs_rgd_gh);
+                       error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
+                                                  LM_ST_EXCLUSIVE, flags,
+                                                  &rs->rs_rgd_gh);
+                       if (!error && sdp->sd_args.ar_rgrplvb) {
+                               error = update_rgrp_lvb(rs->rs_rgd);
+                               if (error) {
+                                       gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+                                       return error;
+                               }
+                       }
                }
                switch (error) {
                case 0:
-                       if (try_rgrp_fit(rgd, ip)) {
-                               ip->i_rgd = rgd;
+                       if (gfs2_rs_active(rs)) {
+                               if (unclaimed_blocks(rs->rs_rgd) +
+                                   rs->rs_free >= rs->rs_requested) {
+                                       ip->i_rgd = rs->rs_rgd;
+                                       return 0;
+                               }
+                               /* We have a multi-block reservation, but the
+                                  rgrp doesn't have enough free blocks to
+                                  satisfy the request. Free the reservation
+                                  and look for a suitable rgrp. */
+                               gfs2_rs_deltree(rs);
+                       }
+                       if (try_rgrp_fit(rs->rs_rgd, ip)) {
+                               if (sdp->sd_args.ar_rgrplvb)
+                                       gfs2_rgrp_bh_get(rs->rs_rgd);
+                               ip->i_rgd = rs->rs_rgd;
                                return 0;
                        }
-                       if (rgd->rd_flags & GFS2_RDF_CHECK)
-                               try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
+                       if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
+                               if (sdp->sd_args.ar_rgrplvb)
+                                       gfs2_rgrp_bh_get(rs->rs_rgd);
+                               try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
+                                               ip->i_no_addr);
+                       }
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
                        /* fall through */
                case GLR_TRYFAILED:
-                       rgd = gfs2_rgrpd_get_next(rgd);
-                       if (rgd == begin) {
-                               flags = 0;
-                               loops++;
-                       }
+                       rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
+                       rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
+                       if (rs->rs_rgd != begin) /* If we didn't wrap */
+                               break;
+
+                       flags &= ~LM_FLAG_TRY;
+                       loops++;
+                       /* Check that fs hasn't grown if writing to rindex */
+                       if (ip == GFS2_I(sdp->sd_rindex) &&
+                           !sdp->sd_rindex_uptodate) {
+                               error = gfs2_ri_update(ip);
+                               if (error)
+                                       goto out;
+                       } else if (loops == 2)
+                               /* Flushing the log may release space */
+                               gfs2_log_flush(sdp, NULL);
                        break;
                default:
-                       return error;
+                       goto out;
                }
        }
-
-       return -ENOSPC;
-}
-
-static void gfs2_blkrsv_put(struct gfs2_inode *ip)
-{
-       BUG_ON(ip->i_res == NULL);
-       kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
-       ip->i_res = NULL;
-}
-
-/**
- * gfs2_inplace_reserve - Reserve space in the filesystem
- * @ip: the inode to reserve space for
- * @requested: the number of blocks to be reserved
- *
- * Returns: errno
- */
-
-int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_blkreserv *rs;
-       int error;
-       u64 last_unlinked = NO_BLOCK;
-       int tries = 0;
-
-       error = gfs2_blkrsv_get(ip);
-       if (error)
-               return error;
-
-       rs = ip->i_res;
-       rs->rs_requested = requested;
-       if (gfs2_assert_warn(sdp, requested)) {
-               error = -EINVAL;
-               goto out;
-       }
-
-       do {
-               error = get_local_rgrp(ip, &last_unlinked);
-               if (error != -ENOSPC)
-                       break;
-               /* Check that fs hasn't grown if writing to rindex */
-               if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
-                       error = gfs2_ri_update(ip);
-                       if (error)
-                               break;
-                       continue;
-               }
-               /* Flushing the log may release space */
-               gfs2_log_flush(sdp, NULL);
-       } while (tries++ < 3);
+       error = -ENOSPC;
 
 out:
        if (error)
-               gfs2_blkrsv_put(ip);
+               rs->rs_requested = 0;
        return error;
 }
 
@@ -1228,9 +1672,15 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
        struct gfs2_blkreserv *rs = ip->i_res;
 
+       if (!rs)
+               return;
+
+       if (!rs->rs_free)
+               gfs2_rs_deltree(rs);
+
        if (rs->rs_rgd_gh.gh_gl)
                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-       gfs2_blkrsv_put(ip);
+       rs->rs_requested = 0;
 }
 
 /**
@@ -1326,7 +1776,27 @@ do_search:
                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
                        buffer = bi->bi_clone + bi->bi_offset;
 
-               biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+               while (1) {
+                       struct gfs2_blkreserv *rs;
+                       u32 rgblk;
+
+                       biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+                       if (biblk == BFITNOENT)
+                               break;
+                       /* Check if this block is reserved() */
+                       rgblk = gfs2_bi2rgd_blk(bi, biblk);
+                       rs = rs_find(rgd, rgblk);
+                       if (rs == NULL)
+                               break;
+
+                       BUG_ON(rs->rs_bi != bi);
+                       biblk = BFITNOENT;
+                       /* This should jump to the first block after the
+                          reservation. */
+                       goal = rs->rs_biblk + rs->rs_free;
+                       if (goal >= bi->bi_len * GFS2_NBBY)
+                               break;
+               }
                if (biblk != BFITNOENT)
                        break;
 
@@ -1362,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
                             u32 blk, bool dinode, unsigned int *n)
 {
        const unsigned int elen = *n;
-       u32 goal;
+       u32 goal, rgblk;
        const u8 *buffer = NULL;
+       struct gfs2_blkreserv *rs;
 
        *n = 0;
        buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1376,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
                goal++;
                if (goal >= (bi->bi_len * GFS2_NBBY))
                        break;
+               rgblk = gfs2_bi2rgd_blk(bi, goal);
+               rs = rs_find(rgd, rgblk);
+               if (rs) /* Oops, we bumped into someone's reservation */
+                       break;
                if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
                    GFS2_BLKST_FREE)
                        break;
@@ -1451,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 
 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 {
-       const struct gfs2_rgrpd *rgd = gl->gl_object;
+       struct gfs2_rgrpd *rgd = gl->gl_object;
+       struct gfs2_blkreserv *trs;
+       const struct rb_node *n;
+
        if (rgd == NULL)
                return 0;
-       gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+       gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-                      rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+                      rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
+                      rgd->rd_reserved);
+       spin_lock(&rgd->rd_rsspin);
+       for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
+               trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+               dump_rs(seq, trs);
+       }
+       spin_unlock(&rgd->rd_rsspin);
        return 0;
 }
 
@@ -1470,11 +1955,64 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
        rgd->rd_flags |= GFS2_RDF_ERROR;
 }
 
+/**
+ * claim_reserved_blks - Claim previously reserved blocks
+ * @ip: the inode that's claiming the reservation
+ * @dinode: 1 if this block is a dinode block, otherwise data block
+ * @nblocks: desired extent length
+ *
+ * Lay claim to previously allocated block reservation blocks.
+ * Returns: Starting block number of the blocks claimed.
+ * Sets *nblocks to the actual extent length allocated.
+ */
+static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
+                              unsigned int *nblocks)
+{
+       struct gfs2_blkreserv *rs = ip->i_res;
+       struct gfs2_rgrpd *rgd = rs->rs_rgd;
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+       struct gfs2_bitmap *bi;
+       u64 start_block = gfs2_rs_startblk(rs);
+       const unsigned int elen = *nblocks;
+
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+       gfs2_assert_withdraw(sdp, rgd);
+       /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+       bi = rs->rs_bi;
+       gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+
+       for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
+               /* Make sure the bitmap hasn't changed */
+               gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
+                           dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+               rs->rs_biblk++;
+               rs->rs_free--;
+
+               BUG_ON(!rgd->rd_reserved);
+               rgd->rd_reserved--;
+               dinode = false;
+               trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+       }
+
+       if (!rs->rs_free) {
+               struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
+
+               gfs2_rs_deltree(rs);
+               /* -nblocks because we haven't returned to do the math yet.
+                  I'm doing the math backwards to prevent negative numbers,
+                  but think of it as:
+                  if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
+               if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
+                       rg_mblk_search(rgd, ip);
+       }
+       return start_block;
+}
+
 /**
  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
  * @ip: the inode to allocate the block for
  * @bn: Used to return the starting block number
- * @ndata: requested number of blocks/extent length (value/result)
+ * @nblocks: requested number of blocks/extent length (value/result)
  * @dinode: 1 if we're allocating a dinode block, else 0
  * @generation: the generation number of the inode
  *
@@ -1496,23 +2034,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
        /* Only happens if there is a bug in gfs2, return something distinctive
         * to ensure that it is noticed.
         */
-       if (ip->i_res == NULL)
+       if (ip->i_res->rs_requested == 0)
                return -ECANCELED;
 
-       rgd = ip->i_rgd;
-
-       if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-               goal = ip->i_goal - rgd->rd_data0;
-       else
-               goal = rgd->rd_last_alloc;
-
-       blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+       /* Check if we have a multi-block reservation, and if so, claim the
+          next free block from it. */
+       if (gfs2_rs_active(ip->i_res)) {
+               BUG_ON(!ip->i_res->rs_free);
+               rgd = ip->i_res->rs_rgd;
+               block = claim_reserved_blks(ip, dinode, nblocks);
+       } else {
+               rgd = ip->i_rgd;
 
-       /* Since all blocks are reserved in advance, this shouldn't happen */
-       if (blk == BFITNOENT)
-               goto rgrp_error;
+               if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
+                       goal = ip->i_goal - rgd->rd_data0;
+               else
+                       goal = rgd->rd_last_alloc;
+
+               blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+
+               /* Since all blocks are reserved in advance, this shouldn't
+                  happen */
+               if (blk == BFITNOENT) {
+                       printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
+                              *nblocks);
+                       printk(KERN_WARNING "FULL=%d\n",
+                              test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
+                       goto rgrp_error;
+               }
 
-       block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+               block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+       }
        ndata = *nblocks;
        if (dinode)
                ndata--;
@@ -1529,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
                        brelse(dibh);
                }
        }
-       if (rgd->rd_free < *nblocks)
+       if (rgd->rd_free < *nblocks) {
+               printk(KERN_WARNING "nblocks=%u\n", *nblocks);
                goto rgrp_error;
+       }
 
        rgd->rd_free -= *nblocks;
        if (dinode) {
@@ -1542,6 +2096,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+       gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
        if (dinode)
@@ -1588,6 +2143,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+       gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
        /* Directories keep their data in the metadata address space */
        if (meta || ip->i_depth)
@@ -1624,6 +2180,8 @@ void gfs2_unlink_di(struct inode *inode)
        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+       gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+       update_rgrp_lvb_unlinked(rgd, 1);
 }
 
 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
@@ -1643,6 +2201,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+       gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+       update_rgrp_lvb_unlinked(rgd, -1);
 
        gfs2_statfs_change(sdp, 0, +1, -1);
 }
@@ -1784,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
                for (x = 0; x < rlist->rl_rgrps; x++)
                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
                kfree(rlist->rl_ghs);
+               rlist->rl_ghs = NULL;
        }
 }
 
index b4b10f4de25f2407c7e9e271de98ab2630cf5255..ca6e26729b867ee0f458e1acae042034e2a703dc 100644 (file)
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+/* Since each block in the file system is represented by two bits in the
+ * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
+ * By reserving 32 blocks at a time, we can optimize / shortcut how we search
+ * through the bitmaps by looking a word at a time.
+ */
+#define RGRP_RSRV_MINBYTES 8
+#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+
 struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
@@ -29,13 +37,7 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
-extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
-static inline void gfs2_qadata_put(struct gfs2_inode *ip)
-{
-       BUG_ON(ip->i_qadata == NULL);
-       kfree(ip->i_qadata);
-       ip->i_qadata = NULL;
-}
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
@@ -43,6 +45,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
                             bool dinode, u64 *generation);
 
+extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
@@ -68,4 +73,30 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
                                   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
+/* This is how to tell if a multi-block reservation is "inplace" reserved: */
+static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
+{
+       if (ip->i_res && ip->i_res->rs_requested)
+               return 1;
+       return 0;
+}
+
+/* This is how to tell if a multi-block reservation is in the rgrp tree: */
+static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
+{
+       if (rs && rs->rs_bi)
+               return 1;
+       return 0;
+}
+
+static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
+{
+       return (bi->bi_start * GFS2_NBBY) + blk;
+}
+
+static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
+{
+       return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
+}
+
 #endif /* __RGRP_DOT_H__ */
index f3d6bbfb32c5308e9893fe6c689bf1935af2bc50..fc3168f47a146a978b733a2dd106aaf50706aeb7 100644 (file)
@@ -78,6 +78,8 @@ enum {
        Opt_quota_quantum,
        Opt_barrier,
        Opt_nobarrier,
+       Opt_rgrplvb,
+       Opt_norgrplvb,
        Opt_error,
 };
 
@@ -115,6 +117,8 @@ static const match_table_t tokens = {
        {Opt_quota_quantum, "quota_quantum=%d"},
        {Opt_barrier, "barrier"},
        {Opt_nobarrier, "nobarrier"},
+       {Opt_rgrplvb, "rgrplvb"},
+       {Opt_norgrplvb, "norgrplvb"},
        {Opt_error, NULL}
 };
 
@@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
                case Opt_nobarrier:
                        args->ar_nobarrier = 1;
                        break;
+               case Opt_rgrplvb:
+                       args->ar_rgrplvb = 1;
+                       break;
+               case Opt_norgrplvb:
+                       args->ar_rgrplvb = 0;
+                       break;
                case Opt_error:
                default:
                        printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
@@ -1381,6 +1391,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
                seq_printf(s, ",nobarrier");
        if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
                seq_printf(s, ",demote_interface_used");
+       if (args->ar_rgrplvb)
+               seq_printf(s, ",rgrplvb");
        return 0;
 }
 
@@ -1401,7 +1413,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_qadata *qa;
        struct gfs2_rgrpd *rgd;
        struct gfs2_holder gh;
        int error;
@@ -1411,13 +1422,13 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
                return -EIO;
        }
 
-       qa = gfs2_qadata_get(ip);
-       if (!qa)
-               return -ENOMEM;
+       error = gfs2_rindex_update(sdp);
+       if (error)
+               return error;
 
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-               goto out;
+               return error;
 
        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
        if (!rgd) {
@@ -1445,8 +1456,6 @@ out_rg_gunlock:
        gfs2_glock_dq_uninit(&gh);
 out_qs:
        gfs2_quota_unhold(ip);
-out:
-       gfs2_qadata_put(ip);
        return error;
 }
 
@@ -1547,6 +1556,9 @@ out_truncate:
 
 out_unlock:
        /* Error path for case 1 */
+       if (gfs2_rs_active(ip->i_res))
+               gfs2_rs_deltree(ip->i_res);
+
        if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
                gfs2_glock_dq(&ip->i_iopen_gh);
        gfs2_holder_uninit(&ip->i_iopen_gh);
@@ -1556,6 +1568,7 @@ out_unlock:
 out:
        /* Case 3 starts here */
        truncate_inode_pages(&inode->i_data, 0);
+       gfs2_rs_delete(ip);
        clear_inode(inode);
        gfs2_dir_hash_inval(ip);
        ip->i_gl->gl_object = NULL;
@@ -1578,6 +1591,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
                ip->i_flags = 0;
                ip->i_gl = NULL;
                ip->i_rgd = NULL;
+               ip->i_res = NULL;
        }
        return &ip->i_inode;
 }
index 73ecc34c434280ec0c7343ca0f801ea08c784f6e..8056b7b7238e9ee7770b21fd7cd5e77835c3062c 100644 (file)
@@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = {
        NULL,
 };
 
+static void gfs2_sbd_release(struct kobject *kobj)
+{
+       struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+
+       kfree(sdp);
+}
+
 static struct kobj_type gfs2_ktype = {
+       .release = gfs2_sbd_release,
        .default_attrs = gfs2_attrs,
        .sysfs_ops     = &gfs2_attr_ops,
 };
@@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
        char ro[20];
        char spectator[20];
        char *envp[] = { ro, spectator, NULL };
+       int sysfs_frees_sdp = 0;
 
        sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
        sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
@@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
        error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
                                     "%s", sdp->sd_table_name);
        if (error)
-               goto fail;
+               goto fail_reg;
 
+       sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
+                               function gfs2_sbd_release. */
        error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
        if (error)
                goto fail_reg;
@@ -615,9 +626,13 @@ fail_lock_module:
 fail_tune:
        sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 fail_reg:
-       kobject_put(&sdp->sd_kobj);
-fail:
+       free_percpu(sdp->sd_lkstats);
        fs_err(sdp, "error %d adding sysfs files", error);
+       if (sysfs_frees_sdp)
+               kobject_put(&sdp->sd_kobj);
+       else
+               kfree(sdp);
+       sb->s_fs_info = NULL;
        return error;
 }
 
index 1b8b81588199a28949270131f66df39863a5dfb6..a25c252fe412a9ebe7ad239b67bb9ba41c459e29 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/ktime.h>
 #include "incore.h"
 #include "glock.h"
+#include "rgrp.h"
 
 #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
 #define glock_trace_name(x) __print_symbolic(x,                \
                            { GFS2_BLKST_DINODE, "dinode" },    \
                            { GFS2_BLKST_UNLINKED, "unlinked" })
 
+#define TRACE_RS_DELETE  0
+#define TRACE_RS_TREEDEL 1
+#define TRACE_RS_INSERT  2
+#define TRACE_RS_CLAIM   3
+
+#define rs_func_name(x) __print_symbolic(x,    \
+                                        { 0, "del " }, \
+                                        { 1, "tdel" }, \
+                                        { 2, "ins " }, \
+                                        { 3, "clm " })
+
 #define show_glock_flags(flags) __print_flags(flags, "",       \
        {(1UL << GLF_LOCK),                     "l" },          \
        {(1UL << GLF_DEMOTE),                   "D" },          \
@@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc,
                __field(        u8,     block_state             )
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
+               __field(        u32,    rd_reserved             )
        ),
 
        TP_fast_assign(
@@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc,
                __entry->block_state    = block_state;
                __entry->rd_addr        = rgd->rd_addr;
                __entry->rd_free_clone  = rgd->rd_free_clone;
+               __entry->rd_reserved    = rgd->rd_reserved;
        ),
 
-       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
+       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long)__entry->len,
                  block_state_name(__entry->block_state),
                  (unsigned long long)__entry->rd_addr,
-                 __entry->rd_free_clone)
+                 __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+);
+
+/* Keep track of multi-block reservations as they are allocated/freed */
+TRACE_EVENT(gfs2_rs,
+
+       TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
+                u8 func),
+
+       TP_ARGS(ip, rs, func),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        u64,    rd_addr                 )
+               __field(        u32,    rd_free_clone           )
+               __field(        u32,    rd_reserved             )
+               __field(        u64,    inum                    )
+               __field(        u64,    start                   )
+               __field(        u32,    free                    )
+               __field(        u8,     func                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
+               __entry->rd_addr        = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
+               __entry->rd_free_clone  = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
+               __entry->rd_reserved    = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
+               __entry->inum           = ip ? ip->i_no_addr : 0;
+               __entry->start          = gfs2_rs_startblk(rs);
+               __entry->free           = rs->rs_free;
+               __entry->func           = func;
+       ),
+
+       TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
+                 "f:%lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->inum,
+                 (unsigned long long)__entry->start,
+                 (unsigned long long)__entry->rd_addr,
+                 (unsigned long)__entry->rd_free_clone,
+                 (unsigned long)__entry->rd_reserved,
+                 rs_func_name(__entry->func), (unsigned long)__entry->free)
 );
 
 #endif /* _TRACE_GFS2_H */
index 125d4572e1c026dbe31b496ae7aa79f4af8f7b56..41f42cdccbb8e5bb3669bc36a159bcc5f6492e90 100644 (file)
@@ -31,7 +31,7 @@ struct gfs2_glock;
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
 {
        const struct gfs2_blkreserv *rs = ip->i_res;
-       if (rs->rs_requested < ip->i_rgd->rd_length)
+       if (rs && rs->rs_requested < ip->i_rgd->rd_length)
                return rs->rs_requested + 1;
        return ip->i_rgd->rd_length;
 }
index 3586b0dd6aa7914d05f582569e418658e6e7a605..80535739ac7b2c5c1f98ac46e9c2cb4faf5d0eb5 100644 (file)
@@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
                       const char *type, const char *function,
                       char *file, unsigned int line);
 
-static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
-                                   struct buffer_head *bh,
-                                   const char *function,
-                                   char *file, unsigned int line)
+static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
+                                   struct buffer_head *bh)
 {
        struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
        u32 magic = be32_to_cpu(mh->mh_magic);
-       if (unlikely(magic != GFS2_MAGIC))
-               return gfs2_meta_check_ii(sdp, bh, "magic number", function,
-                                         file, line);
+       if (unlikely(magic != GFS2_MAGIC)) {
+               printk(KERN_ERR "GFS2: Magic number missing at %llu\n",
+                      (unsigned long long)bh->b_blocknr);
+               return -EIO;
+       }
        return 0;
 }
 
-#define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
-
-
 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
                           u16 type, u16 t,
                           const char *function,
index 927f4df874ae788c744e1e7e51edb19d92b60e36..27a0b4a901f597d6b835051c29ca8632f4a7cf98 100644 (file)
@@ -325,12 +325,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
                               struct gfs2_ea_header *ea,
                               struct gfs2_ea_header *prev, int leave)
 {
-       struct gfs2_qadata *qa;
        int error;
 
-       qa = gfs2_qadata_get(ip);
-       if (!qa)
-               return -ENOMEM;
+       error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+       if (error)
+               return error;
 
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
@@ -340,7 +339,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
        gfs2_quota_unhold(ip);
 out_alloc:
-       gfs2_qadata_put(ip);
        return error;
 }
 
@@ -713,17 +711,16 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                             unsigned int blks,
                             ea_skeleton_call_t skeleton_call, void *private)
 {
-       struct gfs2_qadata *qa;
        struct buffer_head *dibh;
        int error;
 
-       qa = gfs2_qadata_get(ip);
-       if (!qa)
-               return -ENOMEM;
+       error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+       if (error)
+               return error;
 
        error = gfs2_quota_lock_check(ip);
        if (error)
-               goto out;
+               return error;
 
        error = gfs2_inplace_reserve(ip, blks);
        if (error)
@@ -753,8 +750,6 @@ out_ipres:
        gfs2_inplace_release(ip);
 out_gunlock_q:
        gfs2_quota_unlock(ip);
-out:
-       gfs2_qadata_put(ip);
        return error;
 }
 
@@ -1494,16 +1489,15 @@ out_gunlock:
 
 int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
-       struct gfs2_qadata *qa;
        int error;
 
-       qa = gfs2_qadata_get(ip);
-       if (!qa)
-               return -ENOMEM;
+       error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+       if (error)
+               return error;
 
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-               goto out_alloc;
+               return error;
 
        error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
        if (error)
@@ -1519,8 +1513,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 
 out_quota:
        gfs2_quota_unhold(ip);
-out_alloc:
-       gfs2_qadata_put(ip);
        return error;
 }
 
index 0cbd0494b79e0fe13f7b73a9273013f12f98e818..14cf9de1dbe1751645ea1734d20808c9df6c398e 100644 (file)
@@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
 }
 EXPORT_SYMBOL(seq_escape);
 
-int seq_printf(struct seq_file *m, const char *f, ...)
+int seq_vprintf(struct seq_file *m, const char *f, va_list args)
 {
-       va_list args;
        int len;
 
        if (m->count < m->size) {
-               va_start(args, f);
                len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
-               va_end(args);
                if (m->count + len < m->size) {
                        m->count += len;
                        return 0;
@@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...)
        seq_set_overflow(m);
        return -1;
 }
+EXPORT_SYMBOL(seq_vprintf);
+
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+       int ret;
+       va_list args;
+
+       va_start(args, f);
+       ret = seq_vprintf(m, f, args);
+       va_end(args);
+
+       return ret;
+}
 EXPORT_SYMBOL(seq_printf);
 
 /**
index fa98bdb073b96e79c51ca9e0c71bf348d032171a..b2de1f9a88d6b6066c8118009e0ed6b273fbe953 100644 (file)
@@ -170,6 +170,16 @@ struct gfs2_rindex {
 #define GFS2_RGF_NOALLOC       0x00000008
 #define GFS2_RGF_TRIMMED       0x00000010
 
+struct gfs2_rgrp_lvb {
+       __be32 rl_magic;
+       __be32 rl_flags;
+       __be32 rl_free;
+       __be32 rl_dinodes;
+       __be64 rl_igeneration;
+       __be32 rl_unlinked;
+       __be32 __pad;
+};
+
 struct gfs2_rgrp {
        struct gfs2_meta_header rg_header;
 
@@ -214,6 +224,7 @@ enum {
        gfs2fl_NoAtime          = 7,
        gfs2fl_Sync             = 8,
        gfs2fl_System           = 9,
+       gfs2fl_TopLevel         = 10,
        gfs2fl_TruncInProg      = 29,
        gfs2fl_InheritDirectio  = 30,
        gfs2fl_InheritJdata     = 31,
@@ -230,8 +241,9 @@ enum {
 #define GFS2_DIF_NOATIME               0x00000080
 #define GFS2_DIF_SYNC                  0x00000100
 #define GFS2_DIF_SYSTEM                        0x00000200 /* New in gfs2 */
+#define GFS2_DIF_TOPDIR                        0x00000400 /* New in gfs2 */
 #define GFS2_DIF_TRUNC_IN_PROG         0x20000000 /* New in gfs2 */
-#define GFS2_DIF_INHERIT_DIRECTIO      0x40000000
+#define GFS2_DIF_INHERIT_DIRECTIO      0x40000000 /* only in gfs1 */
 #define GFS2_DIF_INHERIT_JDATA         0x80000000
 
 struct gfs2_dinode {
index fc61854f62247b9cbaf7bf490d8580f99d54b34e..83c44eefe698ba97a72700b8cac63a37fb558c82 100644 (file)
@@ -86,6 +86,7 @@ int seq_puts(struct seq_file *m, const char *s);
 int seq_write(struct seq_file *seq, const void *data, size_t len);
 
 __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);
+__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args);
 
 int seq_path(struct seq_file *, const struct path *, const char *);
 int seq_dentry(struct seq_file *, struct dentry *, const char *);