]> rtime.felk.cvut.cz Git - linux-imx.git/blobdiff - fs/btrfs/transaction.c
Btrfs: check to see if root_list is empty before adding it to dead roots
[linux-imx.git] / fs / btrfs / transaction.c
index 0544587d74f4be48ece72380ee0f4335ad636f97..af1931a5960d9d602688fe8bb89242d752b71c5f 100644 (file)
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
+static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
+       [TRANS_STATE_RUNNING]           = 0U,
+       [TRANS_STATE_BLOCKED]           = (__TRANS_USERSPACE |
+                                          __TRANS_START),
+       [TRANS_STATE_COMMIT_START]      = (__TRANS_USERSPACE |
+                                          __TRANS_START |
+                                          __TRANS_ATTACH),
+       [TRANS_STATE_COMMIT_DOING]      = (__TRANS_USERSPACE |
+                                          __TRANS_START |
+                                          __TRANS_ATTACH |
+                                          __TRANS_JOIN),
+       [TRANS_STATE_UNBLOCKED]         = (__TRANS_USERSPACE |
+                                          __TRANS_START |
+                                          __TRANS_ATTACH |
+                                          __TRANS_JOIN |
+                                          __TRANS_JOIN_NOLOCK),
+       [TRANS_STATE_COMPLETED]         = (__TRANS_USERSPACE |
+                                          __TRANS_START |
+                                          __TRANS_ATTACH |
+                                          __TRANS_JOIN |
+                                          __TRANS_JOIN_NOLOCK),
+};
+
 static void put_transaction(struct btrfs_transaction *transaction)
 {
        WARN_ON(atomic_read(&transaction->use_count) == 0);
        if (atomic_dec_and_test(&transaction->use_count)) {
                BUG_ON(!list_empty(&transaction->list));
                WARN_ON(transaction->delayed_refs.root.rb_node);
+               while (!list_empty(&transaction->pending_chunks)) {
+                       struct extent_map *em;
+
+                       em = list_first_entry(&transaction->pending_chunks,
+                                             struct extent_map, list);
+                       list_del_init(&em->list);
+                       free_extent_map(em);
+               }
                kmem_cache_free(btrfs_transaction_cachep, transaction);
        }
 }
@@ -50,18 +81,35 @@ static noinline void switch_commit_root(struct btrfs_root *root)
        root->commit_root = btrfs_root_node(root);
 }
 
-static inline int can_join_transaction(struct btrfs_transaction *trans,
-                                      int type)
+static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
+                                        unsigned int type)
+{
+       if (type & TRANS_EXTWRITERS)
+               atomic_inc(&trans->num_extwriters);
+}
+
+static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
+                                        unsigned int type)
+{
+       if (type & TRANS_EXTWRITERS)
+               atomic_dec(&trans->num_extwriters);
+}
+
+static inline void extwriter_counter_init(struct btrfs_transaction *trans,
+                                         unsigned int type)
+{
+       atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
+}
+
+static inline int extwriter_counter_read(struct btrfs_transaction *trans)
 {
-       return !(trans->in_commit &&
-                type != TRANS_JOIN &&
-                type != TRANS_JOIN_NOLOCK);
+       return atomic_read(&trans->num_extwriters);
 }
 
 /*
  * either allocate a new transaction or hop into the existing one
  */
-static noinline int join_transaction(struct btrfs_root *root, int type)
+static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
 {
        struct btrfs_transaction *cur_trans;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -74,32 +122,19 @@ loop:
                return -EROFS;
        }
 
-       if (fs_info->trans_no_join) {
-               /* 
-                * If we are JOIN_NOLOCK we're already committing a current
-                * transaction, we just need a handle to deal with something
-                * when committing the transaction, such as inode cache and
-                * space cache. It is a special case.
-                */
-               if (type != TRANS_JOIN_NOLOCK) {
-                       spin_unlock(&fs_info->trans_lock);
-                       return -EBUSY;
-               }
-       }
-
        cur_trans = fs_info->running_transaction;
        if (cur_trans) {
                if (cur_trans->aborted) {
                        spin_unlock(&fs_info->trans_lock);
                        return cur_trans->aborted;
                }
-               if (!can_join_transaction(cur_trans, type)) {
+               if (btrfs_blocked_trans_types[cur_trans->state] & type) {
                        spin_unlock(&fs_info->trans_lock);
                        return -EBUSY;
                }
                atomic_inc(&cur_trans->use_count);
                atomic_inc(&cur_trans->num_writers);
-               cur_trans->num_joined++;
+               extwriter_counter_inc(cur_trans, type);
                spin_unlock(&fs_info->trans_lock);
                return 0;
        }
@@ -112,6 +147,12 @@ loop:
        if (type == TRANS_ATTACH)
                return -ENOENT;
 
+       /*
+        * JOIN_NOLOCK only happens during the transaction commit, so
+        * it is impossible that ->running_transaction is NULL
+        */
+       BUG_ON(type == TRANS_JOIN_NOLOCK);
+
        cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
        if (!cur_trans)
                return -ENOMEM;
@@ -120,7 +161,7 @@ loop:
        if (fs_info->running_transaction) {
                /*
                 * someone started a transaction after we unlocked.  Make sure
-                * to redo the trans_no_join checks above
+                * to redo the checks above
                 */
                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
                goto loop;
@@ -131,17 +172,15 @@ loop:
        }
 
        atomic_set(&cur_trans->num_writers, 1);
-       cur_trans->num_joined = 0;
+       extwriter_counter_init(cur_trans, type);
        init_waitqueue_head(&cur_trans->writer_wait);
        init_waitqueue_head(&cur_trans->commit_wait);
-       cur_trans->in_commit = 0;
-       cur_trans->blocked = 0;
+       cur_trans->state = TRANS_STATE_RUNNING;
        /*
         * One for this trans handle, one so it will live on until we
         * commit the transaction.
         */
        atomic_set(&cur_trans->use_count, 2);
-       cur_trans->commit_done = 0;
        cur_trans->start_time = get_seconds();
 
        cur_trans->delayed_refs.root = RB_ROOT;
@@ -164,7 +203,6 @@ loop:
                        "creating a fresh transaction\n");
        atomic64_set(&fs_info->tree_mod_seq, 0);
 
-       spin_lock_init(&cur_trans->commit_lock);
        spin_lock_init(&cur_trans->delayed_refs.lock);
        atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
        atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
@@ -172,6 +210,7 @@ loop:
 
        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
        INIT_LIST_HEAD(&cur_trans->ordered_operations);
+       INIT_LIST_HEAD(&cur_trans->pending_chunks);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
@@ -269,6 +308,13 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static inline int is_transaction_blocked(struct btrfs_transaction *trans)
+{
+       return (trans->state >= TRANS_STATE_BLOCKED &&
+               trans->state < TRANS_STATE_UNBLOCKED &&
+               !trans->aborted);
+}
+
 /* wait for commit against the current transaction to become unblocked
  * when this is done, it is safe to start a new transaction, but the current
  * transaction might not be fully on disk.
@@ -279,12 +325,13 @@ static void wait_current_trans(struct btrfs_root *root)
 
        spin_lock(&root->fs_info->trans_lock);
        cur_trans = root->fs_info->running_transaction;
-       if (cur_trans && cur_trans->blocked) {
+       if (cur_trans && is_transaction_blocked(cur_trans)) {
                atomic_inc(&cur_trans->use_count);
                spin_unlock(&root->fs_info->trans_lock);
 
                wait_event(root->fs_info->transaction_wait,
-                          !cur_trans->blocked);
+                          cur_trans->state >= TRANS_STATE_UNBLOCKED ||
+                          cur_trans->aborted);
                put_transaction(cur_trans);
        } else {
                spin_unlock(&root->fs_info->trans_lock);
@@ -307,7 +354,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
 }
 
 static struct btrfs_trans_handle *
-start_transaction(struct btrfs_root *root, u64 num_items, int type,
+start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
                  enum btrfs_reserve_flush_enum flush)
 {
        struct btrfs_trans_handle *h;
@@ -320,7 +367,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
                return ERR_PTR(-EROFS);
 
        if (current->journal_info) {
-               WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
+               WARN_ON(type & TRANS_EXTWRITERS);
                h = current->journal_info;
                h->use_count++;
                WARN_ON(h->use_count > 2);
@@ -366,7 +413,7 @@ again:
         * If we are ATTACH, it means we just want to catch the current
         * transaction and commit it, so we needn't do sb_start_intwrite(). 
         */
-       if (type < TRANS_JOIN_NOLOCK)
+       if (type & __TRANS_FREEZABLE)
                sb_start_intwrite(root->fs_info->sb);
 
        if (may_wait_transaction(root, type))
@@ -408,7 +455,8 @@ again:
        INIT_LIST_HEAD(&h->new_bgs);
 
        smp_mb();
-       if (cur_trans->blocked && may_wait_transaction(root, type)) {
+       if (cur_trans->state >= TRANS_STATE_BLOCKED &&
+           may_wait_transaction(root, type)) {
                btrfs_commit_transaction(h, root);
                goto again;
        }
@@ -429,7 +477,7 @@ got_it:
        return h;
 
 join_fail:
-       if (type < TRANS_JOIN_NOLOCK)
+       if (type & __TRANS_FREEZABLE)
                sb_end_intwrite(root->fs_info->sb);
        kmem_cache_free(btrfs_trans_handle_cachep, h);
 alloc_fail:
@@ -490,7 +538,7 @@ struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
 }
 
 /*
- * btrfs_attach_transaction() - catch the running transaction
+ * btrfs_attach_transaction_barrier() - catch the running transaction
  *
  * It is similar to the above function, the differentia is this one
  * will wait for all the inactive transactions until they fully
@@ -512,7 +560,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
 static noinline void wait_for_commit(struct btrfs_root *root,
                                    struct btrfs_transaction *commit)
 {
-       wait_event(commit->commit_wait, commit->commit_done);
+       wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
 }
 
 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -548,8 +596,8 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
                spin_lock(&root->fs_info->trans_lock);
                list_for_each_entry_reverse(t, &root->fs_info->trans_list,
                                            list) {
-                       if (t->in_commit) {
-                               if (t->commit_done)
+                       if (t->state >= TRANS_STATE_COMMIT_START) {
+                               if (t->state == TRANS_STATE_COMPLETED)
                                        break;
                                cur_trans = t;
                                atomic_inc(&cur_trans->use_count);
@@ -576,10 +624,11 @@ void btrfs_throttle(struct btrfs_root *root)
 static int should_end_transaction(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root)
 {
-       int ret;
+       if (root->fs_info->global_block_rsv.space_info->full &&
+           btrfs_should_throttle_delayed_refs(trans, root))
+               return 1;
 
-       ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
-       return ret ? 1 : 0;
+       return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
 }
 
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
@@ -590,7 +639,8 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
        int err;
 
        smp_mb();
-       if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
+       if (cur_trans->state >= TRANS_STATE_BLOCKED ||
+           cur_trans->delayed_refs.flushing)
                return 1;
 
        updates = trans->delayed_ref_updates;
@@ -609,7 +659,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 {
        struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_fs_info *info = root->fs_info;
-       int count = 0;
+       unsigned long cur = trans->delayed_ref_updates;
        int lock = (trans->type != TRANS_JOIN_NOLOCK);
        int err = 0;
 
@@ -638,17 +688,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        if (!list_empty(&trans->new_bgs))
                btrfs_create_pending_block_groups(trans, root);
 
-       while (count < 1) {
-               unsigned long cur = trans->delayed_ref_updates;
+       trans->delayed_ref_updates = 0;
+       if (btrfs_should_throttle_delayed_refs(trans, root)) {
+               cur = max_t(unsigned long, cur, 1);
                trans->delayed_ref_updates = 0;
-               if (cur &&
-                   trans->transaction->delayed_refs.num_heads_ready > 64) {
-                       trans->delayed_ref_updates = 0;
-                       btrfs_run_delayed_refs(trans, root, cur);
-               } else {
-                       break;
-               }
-               count++;
+               btrfs_run_delayed_refs(trans, root, cur);
        }
 
        btrfs_trans_release_metadata(trans, root);
@@ -658,12 +702,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                btrfs_create_pending_block_groups(trans, root);
 
        if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
-           should_end_transaction(trans, root)) {
-               trans->transaction->blocked = 1;
-               smp_wmb();
+           should_end_transaction(trans, root) &&
+           ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
+               spin_lock(&info->trans_lock);
+               if (cur_trans->state == TRANS_STATE_RUNNING)
+                       cur_trans->state = TRANS_STATE_BLOCKED;
+               spin_unlock(&info->trans_lock);
        }
 
-       if (lock && cur_trans->blocked && !cur_trans->in_commit) {
+       if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
                if (throttle) {
                        /*
                         * We may race with somebody else here so end up having
@@ -677,12 +724,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                }
        }
 
-       if (trans->type < TRANS_JOIN_NOLOCK)
+       if (trans->type & __TRANS_FREEZABLE)
                sb_end_intwrite(root->fs_info->sb);
 
        WARN_ON(cur_trans != info->running_transaction);
        WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
        atomic_dec(&cur_trans->num_writers);
+       extwriter_counter_dec(cur_trans, trans->type);
 
        smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
@@ -736,9 +784,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
        struct extent_state *cached_state = NULL;
        u64 start = 0;
        u64 end;
-       struct blk_plug plug;
 
-       blk_start_plug(&plug);
        while (!find_first_extent_bit(dirty_pages, start, &start, &end,
                                      mark, &cached_state)) {
                convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
@@ -752,7 +798,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
        }
        if (err)
                werr = err;
-       blk_finish_plug(&plug);
        return werr;
 }
 
@@ -797,8 +842,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 {
        int ret;
        int ret2;
+       struct blk_plug plug;
 
+       blk_start_plug(&plug);
        ret = btrfs_write_marked_extents(root, dirty_pages, mark);
+       blk_finish_plug(&plug);
        ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 
        if (ret)
@@ -935,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
  * a dirty root struct and adds it into the list of dead roots that need to
  * be deleted
  */
-int btrfs_add_dead_root(struct btrfs_root *root)
+void btrfs_add_dead_root(struct btrfs_root *root)
 {
        spin_lock(&root->fs_info->trans_lock);
-       list_add_tail(&root->root_list, &root->fs_info->dead_roots);
+       if (list_empty(&root->root_list))
+               list_add_tail(&root->root_list, &root->fs_info->dead_roots);
        spin_unlock(&root->fs_info->trans_lock);
-       return 0;
 }
 
 /*
@@ -1318,20 +1366,26 @@ static void update_super_roots(struct btrfs_root *root)
 
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
 {
+       struct btrfs_transaction *trans;
        int ret = 0;
+
        spin_lock(&info->trans_lock);
-       if (info->running_transaction)
-               ret = info->running_transaction->in_commit;
+       trans = info->running_transaction;
+       if (trans)
+               ret = (trans->state >= TRANS_STATE_COMMIT_START);
        spin_unlock(&info->trans_lock);
        return ret;
 }
 
 int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 {
+       struct btrfs_transaction *trans;
        int ret = 0;
+
        spin_lock(&info->trans_lock);
-       if (info->running_transaction)
-               ret = info->running_transaction->blocked;
+       trans = info->running_transaction;
+       if (trans)
+               ret = is_transaction_blocked(trans);
        spin_unlock(&info->trans_lock);
        return ret;
 }
@@ -1343,7 +1397,9 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 static void wait_current_trans_commit_start(struct btrfs_root *root,
                                            struct btrfs_transaction *trans)
 {
-       wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
+       wait_event(root->fs_info->transaction_blocked_wait,
+                  trans->state >= TRANS_STATE_COMMIT_START ||
+                  trans->aborted);
 }
 
 /*
@@ -1354,7 +1410,8 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
                                         struct btrfs_transaction *trans)
 {
        wait_event(root->fs_info->transaction_wait,
-                  trans->commit_done || (trans->in_commit && !trans->blocked));
+                  trans->state >= TRANS_STATE_UNBLOCKED ||
+                  trans->aborted);
 }
 
 /*
@@ -1450,26 +1507,31 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
 
        spin_lock(&root->fs_info->trans_lock);
 
-       if (list_empty(&cur_trans->list)) {
-               spin_unlock(&root->fs_info->trans_lock);
-               btrfs_end_transaction(trans, root);
-               return;
-       }
+       /*
+        * If the transaction is removed from the list, it means this
+        * transaction has been committed successfully, so it is impossible
+        * to call the cleanup function.
+        */
+       BUG_ON(list_empty(&cur_trans->list));
 
        list_del_init(&cur_trans->list);
        if (cur_trans == root->fs_info->running_transaction) {
-               root->fs_info->trans_no_join = 1;
+               cur_trans->state = TRANS_STATE_COMMIT_DOING;
                spin_unlock(&root->fs_info->trans_lock);
                wait_event(cur_trans->writer_wait,
                           atomic_read(&cur_trans->num_writers) == 1);
 
                spin_lock(&root->fs_info->trans_lock);
-               root->fs_info->running_transaction = NULL;
        }
        spin_unlock(&root->fs_info->trans_lock);
 
        btrfs_cleanup_one_transaction(trans->transaction, root);
 
+       spin_lock(&root->fs_info->trans_lock);
+       if (cur_trans == root->fs_info->running_transaction)
+               root->fs_info->running_transaction = NULL;
+       spin_unlock(&root->fs_info->trans_lock);
+
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
@@ -1481,33 +1543,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
                current->journal_info = NULL;
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
-
-       spin_lock(&root->fs_info->trans_lock);
-       root->fs_info->trans_no_join = 0;
-       spin_unlock(&root->fs_info->trans_lock);
 }
 
 static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root)
 {
-       int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
-       int snap_pending = 0;
        int ret;
 
-       if (!flush_on_commit) {
-               spin_lock(&root->fs_info->trans_lock);
-               if (!list_empty(&trans->transaction->pending_snapshots))
-                       snap_pending = 1;
-               spin_unlock(&root->fs_info->trans_lock);
-       }
-
-       if (flush_on_commit || snap_pending) {
-               ret = btrfs_start_delalloc_inodes(root, 1);
-               if (ret)
-                       return ret;
-               btrfs_wait_ordered_extents(root, 1);
-       }
-
        ret = btrfs_run_delayed_items(trans, root);
        if (ret)
                return ret;
@@ -1531,23 +1573,25 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-/*
- * btrfs_transaction state sequence:
- *    in_commit = 0, blocked = 0  (initial)
- *    in_commit = 1, blocked = 1
- *    blocked = 0
- *    commit_done = 1
- */
+static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+{
+       if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+               return btrfs_start_all_delalloc_inodes(fs_info, 1);
+       return 0;
+}
+
+static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+{
+       if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+               btrfs_wait_all_ordered_extents(fs_info, 1);
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
-       unsigned long joined = 0;
        struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_transaction *prev_trans = NULL;
-       DEFINE_WAIT(wait);
        int ret;
-       int should_grow = 0;
-       unsigned long now = get_seconds();
 
        ret = btrfs_run_ordered_operations(trans, root, 0);
        if (ret) {
@@ -1586,6 +1630,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         * start sending their work down.
         */
        cur_trans->delayed_refs.flushing = 1;
+       smp_wmb();
 
        if (!list_empty(&trans->new_bgs))
                btrfs_create_pending_block_groups(trans, root);
@@ -1596,9 +1641,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                return ret;
        }
 
-       spin_lock(&cur_trans->commit_lock);
-       if (cur_trans->in_commit) {
-               spin_unlock(&cur_trans->commit_lock);
+       spin_lock(&root->fs_info->trans_lock);
+       if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
+               spin_unlock(&root->fs_info->trans_lock);
                atomic_inc(&cur_trans->use_count);
                ret = btrfs_end_transaction(trans, root);
 
@@ -1609,16 +1654,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                return ret;
        }
 
-       trans->transaction->in_commit = 1;
-       trans->transaction->blocked = 1;
-       spin_unlock(&cur_trans->commit_lock);
+       cur_trans->state = TRANS_STATE_COMMIT_START;
        wake_up(&root->fs_info->transaction_blocked_wait);
 
-       spin_lock(&root->fs_info->trans_lock);
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
-               if (!prev_trans->commit_done) {
+               if (prev_trans->state != TRANS_STATE_COMPLETED) {
                        atomic_inc(&prev_trans->use_count);
                        spin_unlock(&root->fs_info->trans_lock);
 
@@ -1632,42 +1674,32 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                spin_unlock(&root->fs_info->trans_lock);
        }
 
-       if (!btrfs_test_opt(root, SSD) &&
-           (now < cur_trans->start_time || now - cur_trans->start_time < 1))
-               should_grow = 1;
-
-       do {
-               joined = cur_trans->num_joined;
-
-               WARN_ON(cur_trans != trans->transaction);
-
-               ret = btrfs_flush_all_pending_stuffs(trans, root);
-               if (ret)
-                       goto cleanup_transaction;
+       extwriter_counter_dec(cur_trans, trans->type);
 
-               prepare_to_wait(&cur_trans->writer_wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
+       ret = btrfs_start_delalloc_flush(root->fs_info);
+       if (ret)
+               goto cleanup_transaction;
 
-               if (atomic_read(&cur_trans->num_writers) > 1)
-                       schedule_timeout(MAX_SCHEDULE_TIMEOUT);
-               else if (should_grow)
-                       schedule_timeout(1);
+       ret = btrfs_flush_all_pending_stuffs(trans, root);
+       if (ret)
+               goto cleanup_transaction;
 
-               finish_wait(&cur_trans->writer_wait, &wait);
-       } while (atomic_read(&cur_trans->num_writers) > 1 ||
-                (should_grow && cur_trans->num_joined != joined));
+       wait_event(cur_trans->writer_wait,
+                  extwriter_counter_read(cur_trans) == 0);
 
+       /* some pending stuffs might be added after the previous flush. */
        ret = btrfs_flush_all_pending_stuffs(trans, root);
        if (ret)
                goto cleanup_transaction;
 
+       btrfs_wait_delalloc_flush(root->fs_info);
        /*
         * Ok now we need to make sure to block out any other joins while we
         * commit the transaction.  We could have started a join before setting
-        * no_join so make sure to wait for num_writers to == 1 again.
+        * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
         */
        spin_lock(&root->fs_info->trans_lock);
-       root->fs_info->trans_no_join = 1;
+       cur_trans->state = TRANS_STATE_COMMIT_DOING;
        spin_unlock(&root->fs_info->trans_lock);
        wait_event(cur_trans->writer_wait,
                   atomic_read(&cur_trans->num_writers) == 1);
@@ -1794,10 +1826,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
               sizeof(*root->fs_info->super_copy));
 
-       trans->transaction->blocked = 0;
        spin_lock(&root->fs_info->trans_lock);
+       cur_trans->state = TRANS_STATE_UNBLOCKED;
        root->fs_info->running_transaction = NULL;
-       root->fs_info->trans_no_join = 0;
        spin_unlock(&root->fs_info->trans_lock);
        mutex_unlock(&root->fs_info->reloc_mutex);
 
@@ -1825,10 +1856,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_finish_extent_commit(trans, root);
 
-       cur_trans->commit_done = 1;
-
        root->fs_info->last_trans_committed = cur_trans->transid;
-
+       /*
+        * We needn't acquire the lock here because there is no other task
+        * which can change it.
+        */
+       cur_trans->state = TRANS_STATE_COMPLETED;
        wake_up(&cur_trans->commit_wait);
 
        spin_lock(&root->fs_info->trans_lock);
@@ -1838,7 +1871,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
-       if (trans->type < TRANS_JOIN_NOLOCK)
+       if (trans->type & __TRANS_FREEZABLE)
                sb_end_intwrite(root->fs_info->sb);
 
        trace_btrfs_transaction_commit(root);
@@ -1885,11 +1918,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
        int ret;
        struct btrfs_fs_info *fs_info = root->fs_info;
 
-       if (fs_info->sb->s_flags & MS_RDONLY) {
-               pr_debug("btrfs: cleaner called for RO fs!\n");
-               return 0;
-       }
-
        spin_lock(&fs_info->trans_lock);
        if (list_empty(&fs_info->dead_roots)) {
                spin_unlock(&fs_info->trans_lock);
@@ -1897,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
        }
        root = list_first_entry(&fs_info->dead_roots,
                        struct btrfs_root, root_list);
-       list_del(&root->root_list);
+       list_del_init(&root->root_list);
        spin_unlock(&fs_info->trans_lock);
 
        pr_debug("btrfs: cleaner removing %llu\n",