From: Jens Axboe Mount with "mount -o barrier=1" to enable barriers. DESC jbd: barrier fallback on failure EDESC If an attempt to perform a barrier-based write fails because the disk doesn't support barriers, drop a message and disable barriers. DESC Handle async barrier failures EDESC From: Chris Mason Sometimes barriers fail asynchronously (after the submit_bh call). This patch changes both ext3 and reiserfs to handle that without mistaking the barrier failures for io errors. Signed-off-by: Andrew Morton --- 25-akpm/fs/ext3/super.c | 30 +++++++++++++++++++++++------- 25-akpm/fs/jbd/commit.c | 32 ++++++++++++++++++++++++++++++-- 25-akpm/fs/reiserfs/journal.c | 13 +++++++++++++ 25-akpm/include/linux/ext3_fs.h | 1 + 25-akpm/include/linux/jbd.h | 1 + 5 files changed, 68 insertions(+), 9 deletions(-) diff -puN fs/ext3/super.c~ext3-barrier-support fs/ext3/super.c --- 25/fs/ext3/super.c~ext3-barrier-support Mon Aug 16 15:54:34 2004 +++ 25-akpm/fs/ext3/super.c Mon Aug 16 15:54:34 2004 @@ -585,7 +585,7 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_err, + Opt_ignore, Opt_barrier, Opt_err, }; static match_table_t tokens = { @@ -632,6 +632,7 @@ static match_table_t tokens = { {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, {Opt_ignore, "usrquota"}, + {Opt_barrier, "barrier=%u"}, {Opt_err, NULL} }; @@ -903,6 +904,14 @@ clear_qf_name: case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); break; + case Opt_barrier: + if (match_int(&args[0], &option)) + return 0; + if (option) + set_opt(sbi->s_mount_opt, BARRIER); + else + clear_opt(sbi->s_mount_opt, BARRIER); + break; case Opt_ignore: break; default: @@ -1615,16 +1624,23 @@ out_fail: * initial mount, once the journal has been initialised but before we've * done any recovery; and again on any subsequent remount. */ -static void ext3_init_journal_params(struct ext3_sb_info *sbi, - journal_t *journal) +static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) { + struct ext3_sb_info *sbi = EXT3_SB(sb); + if (sbi->s_commit_interval) journal->j_commit_interval = sbi->s_commit_interval; /* We could also set up an ext3-specific default for the commit * interval here, but for now we'll just fall back to the jbd * default. */ -} + spin_lock(&journal->j_state_lock); + if (test_opt(sb, BARRIER)) + journal->j_flags |= JFS_BARRIER; + else + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); +} static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum) { @@ -1662,7 +1678,7 @@ static journal_t *ext3_get_journal(struc return NULL; } journal->j_private = sb; - ext3_init_journal_params(EXT3_SB(sb), journal); + ext3_init_journal_params(sb, journal); return journal; } @@ -1747,7 +1763,7 @@ static journal_t *ext3_get_dev_journal(s goto out_journal; } EXT3_SB(sb)->journal_bdev = bdev; - ext3_init_journal_params(EXT3_SB(sb), journal); + ext3_init_journal_params(sb, journal); return journal; out_journal: journal_destroy(journal); @@ -2044,7 +2060,7 @@ int ext3_remount (struct super_block * s es = sbi->s_es; - ext3_init_journal_params(sbi, sbi->s_journal); + ext3_init_journal_params(sb, sbi->s_journal); if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) diff -puN fs/jbd/commit.c~ext3-barrier-support fs/jbd/commit.c --- 25/fs/jbd/commit.c~ext3-barrier-support Mon Aug 16 15:54:34 2004 +++ 25-akpm/fs/jbd/commit.c Mon Aug 16 15:54:44 2004 @@ -639,10 +639,38 @@ wait_for_iobuf: JBUFFER_TRACE(descriptor, "write commit block"); { struct buffer_head *bh = jh2bh(descriptor); + int ret; + int barrier_done = 0; set_buffer_dirty(bh); - sync_dirty_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) + if (journal->j_flags & JFS_BARRIER) { + set_buffer_ordered(bh); + barrier_done = 1; + } + ret = sync_dirty_buffer(bh); + /* is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP && barrier_done) { + char b[BDEVNAME_SIZE]; + + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + clear_buffer_ordered(bh); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + if (unlikely(ret == -EIO)) err = -EIO; put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff -puN include/linux/ext3_fs.h~ext3-barrier-support include/linux/ext3_fs.h --- 25/include/linux/ext3_fs.h~ext3-barrier-support Mon Aug 16 15:54:34 2004 +++ 25-akpm/include/linux/ext3_fs.h Mon Aug 16 15:54:34 2004 @@ -326,6 +326,7 @@ struct ext3_inode { #define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ +#define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff -puN include/linux/jbd.h~ext3-barrier-support include/linux/jbd.h --- 25/include/linux/jbd.h~ext3-barrier-support Mon Aug 16 15:54:34 2004 +++ 25-akpm/include/linux/jbd.h Mon Aug 16 15:54:34 2004 @@ -840,6 +840,7 @@ struct journal_s #define JFS_ACK_ERR 0x004 /* The errno in the sb has been acked */ #define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JFS_LOADED 0x010 /* The journal superblock has been loaded */ +#define JFS_BARRIER 0x020 /* Use IDE barriers */ /* * Function declarations for the journaling transaction and buffer diff -puN fs/reiserfs/journal.c~ext3-barrier-support fs/reiserfs/journal.c --- 25/fs/reiserfs/journal.c~ext3-barrier-support Mon Aug 16 15:54:44 2004 +++ 25-akpm/fs/reiserfs/journal.c Mon Aug 16 15:54:44 2004 @@ -655,6 +655,17 @@ static int submit_barrier_buffer(struct return submit_bh(WRITE_BARRIER, bh) ; } +static void check_barrier_completion(struct super_block *s, + struct buffer_head *bh) { + if (buffer_eopnotsupp(bh)) { + clear_buffer_eopnotsupp(bh); + disable_barrier(s); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + } +} + #define CHUNK_SIZE 32 struct buffer_chunk { struct buffer_head *bh[CHUNK_SIZE]; @@ -1032,6 +1043,7 @@ static int flush_commit_list(struct supe } else wait_on_buffer(jl->j_commit_bh); + check_barrier_completion(s, jl->j_commit_bh); if (!buffer_uptodate(jl->j_commit_bh)) { reiserfs_panic(s, "journal-615: buffer write failed\n") ; } @@ -1142,6 +1154,7 @@ static int _update_journal_header_block( goto sync; } wait_on_buffer(SB_JOURNAL(p_s_sb)->j_header_bh); + check_barrier_completion(p_s_sb, SB_JOURNAL(p_s_sb)->j_header_bh); } else { sync: set_buffer_dirty(SB_JOURNAL(p_s_sb)->j_header_bh) ; _