diff options
author | OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> | 2013-06-17 15:01:28 +0900 |
---|---|---|
committer | Daniel Phillips <daniel@tux3.org> | 2013-06-17 15:01:28 +0900 |
commit | 82b74d1e5559f14656db2133ea6b0aae24ba89ad (patch) | |
tree | ac85a58f587ac03f909ec0e353327f7d746b0ced | |
parent | 3abe658c0f67bc3617932e929266f70220b78f9d (diff) | |
download | linux-tux3-82b74d1e5559f14656db2133ea6b0aae24ba89ad.tar.gz |
tux3: Hack, Use own bdi flusher
On previous patch, we added own bdi flusher (TUX3_FLUSHER_ASYNC_HACK).
But, on previous patch, bdi flusher is not any data actually.
This patch starts to handle the kernel flush requests via own bdi flusher.
FIXME: the way of delta transition timing may not be
efficient. Because it is starting delta transition after flush
request.
FIXME: we are flushing all data for any request. We may choose data to
flush conditionally.
FIXME: we are waiting the commit always. This wait is unnecessary.
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r-- | fs/tux3/commit_flusher.c | 44 | ||||
-rw-r--r-- | fs/tux3/commit_flusher.h | 2 | ||||
-rw-r--r-- | fs/tux3/commit_flusher_hack.c | 226 | ||||
-rw-r--r-- | fs/tux3/super.c | 15 | ||||
-rw-r--r-- | fs/tux3/tux3.h | 2 |
5 files changed, 240 insertions, 49 deletions
diff --git a/fs/tux3/commit_flusher.c b/fs/tux3/commit_flusher.c index 229ac3254d22ed..25d8ac30e0c272 100644 --- a/fs/tux3/commit_flusher.c +++ b/fs/tux3/commit_flusher.c @@ -1,7 +1,15 @@ +#if TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK #include "tux3.h" -#if TUX3_FLUSHER != TUX3_FLUSHER_SYNC +static void __tux3_init_flusher(struct sb *sb) +{ +#ifdef __KERNEL__ + /* Disable writeback task to control inode reclaim by dirty flags */ + vfs_sb(sb)->s_bdi = &noop_backing_dev_info; +#endif +} +#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN static int flush_delta_work(void *data) { struct sb *sb = data; @@ -37,6 +45,8 @@ int tux3_init_flusher(struct sb *sb) struct task_struct *task; char b[BDEVNAME_SIZE]; + __tux3_init_flusher(sb); + bdevname(vfs_sb(sb)->s_bdev, b); /* FIXME: we should use normal bdi-writeback by changing core */ @@ -57,29 +67,17 @@ void tux3_exit_flusher(struct sb *sb) } } -#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN -int tux3_setup_flusher(struct sb *sb) -{ - /* Disable writeback task to control inode reclaim by dirty flags */ - vfs_sb(sb)->s_bdi = &noop_backing_dev_info; - return 0; -} - -void tux3_cleanup_flusher(struct sb *sb) -{ -} -#endif /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_OWN */ - static void schedule_flush_delta(struct sb *sb) { /* Start the flusher for pending delta */ wake_up_process(sb->flush_task); } -#else /* TUX3_FLUSHER == TUX3_FLUSHER_SYNC */ +#else /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_OWN */ int tux3_init_flusher(struct sb *sb) { + __tux3_init_flusher(sb); return 0; } @@ -87,19 +85,6 @@ void tux3_exit_flusher(struct sb *sb) { } -int tux3_setup_flusher(struct sb *sb) -{ -#ifdef __KERNEL__ - /* Disable writeback task to control inode reclaim by dirty flags */ - vfs_sb(sb)->s_bdi = &noop_backing_dev_info; -#endif - return 0; -} - -void tux3_cleanup_flusher(struct sb *sb) -{ -} - static void schedule_flush_delta(struct sb *sb) { /* Wake up waiters for pending marshal delta */ @@ -118,7 +103,7 @@ static int flush_pending_delta(struct sb *sb) out: return err; } -#endif /* TUX3_FLUSHER == TUX3_FLUSHER_SYNC */ +#endif /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_OWN */ /* Try delta transition */ static void try_delta_transition(struct sb *sb) @@ -212,3 +197,4 @@ static int sync_current_delta(struct sb *sb, enum rollup_flags rollup_flag) return err; } +#endif /* TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_HACK */ diff --git a/fs/tux3/commit_flusher.h b/fs/tux3/commit_flusher.h index 7dbe76beb10ba7..41a204e9f5dc9b 100644 --- a/fs/tux3/commit_flusher.h +++ b/fs/tux3/commit_flusher.h @@ -18,7 +18,5 @@ static inline void tux3_start_periodical_flusher(struct sb *sb) { } int tux3_init_flusher(struct sb *sb); void tux3_exit_flusher(struct sb *sb); -int tux3_setup_flusher(struct sb *sb); -void tux3_cleanup_flusher(struct sb *sb); #endif /* !TUX3_COMMIT_FLUSHER_H */ diff --git a/fs/tux3/commit_flusher_hack.c b/fs/tux3/commit_flusher_hack.c index 47e8d318608436..3b2df20899f2a9 100644 --- a/fs/tux3/commit_flusher_hack.c +++ b/fs/tux3/commit_flusher_hack.c @@ -80,6 +80,189 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; +/* Do the delta transition until specified delta */ +static int try_delta_transition_until_delta(struct sb *sb, unsigned delta) +{ + trace("delta %u, marshal %u, backend_state %lx", + delta, sb->marshal_delta, sb->backend_state); + + /* Already delta transition was started for delta */ + if (delta_after_eq(sb->marshal_delta, delta)) + return 1; + + if (!test_and_set_bit(TUX3_COMMIT_RUNNING_BIT, &sb->backend_state)) { + /* Recheck after grabed TUX3_COMMIT_RUNNING_BIT */ + if (delta_after_eq(sb->marshal_delta, delta)) { + clear_bit(TUX3_COMMIT_RUNNING_BIT, &sb->backend_state); + return 1; + } + + delta_transition(sb); + } + + return delta_after_eq(sb->marshal_delta, delta); +} + +/* Advance delta transition until specified delta */ +static int wait_for_transition(struct sb *sb, unsigned delta) +{ + return wait_event_killable(sb->delta_event_wq, + try_delta_transition_until_delta(sb, delta)); +} + +static long tux3_wb_writeback(struct bdi_writeback *wb, + struct wb_writeback_work *work) +{ + struct sb *sb = container_of(wb->bdi, struct sb, bdi); + struct delta_ref *delta_ref; + unsigned delta; + int err; + + if (!wb_has_dirty_io(wb)) + return 0; + + /* Get delta that have to write */ + delta_ref = delta_get(sb); +#ifdef ROLLUP_DEBUG + /* NO_ROLLUP and FORCE_ROLLUP are not supported for now */ + delta_ref->rollup_flag = ALLOW_ROLLUP; +#endif + delta = delta_ref->delta; + delta_put(sb, delta_ref); + + /* Make sure the delta transition was done for current delta */ + err = wait_for_transition(sb, delta); + if (err) + return err; + assert(delta_after_eq(sb->marshal_delta, delta)); + + /* Wait for last referencer of delta was gone */ + wait_event(sb->delta_event_wq, + test_bit(TUX3_COMMIT_PENDING_BIT, &sb->backend_state)); + + if (test_bit(TUX3_COMMIT_PENDING_BIT, &sb->backend_state)) { + clear_bit(TUX3_COMMIT_PENDING_BIT, &sb->backend_state); + + err = flush_delta(sb); + /* FIXME: error handling */ +#if 0 + /* wb_update_bandwidth() is not exported to module */ + wb_update_bandwidth(wb, wb_start); +#endif + } + + return 1; /* FIXME: return code */ +} + +static bool inode_dirtied_after(struct inode *inode, unsigned long t) +{ + bool ret = time_after(inode->dirtied_when, t); +#ifndef CONFIG_64BIT + /* + * For inodes being constantly redirtied, dirtied_when can get stuck. + * It _appears_ to be in the future, but is actually in distant past. + * This test is necessary to prevent such wrapped-around relative times + * from permanently stopping the whole bdi writeback. + */ + ret = ret && time_before_eq(inode->dirtied_when, jiffies); +#endif + return ret; +} + +static int tux3_has_old_data(struct bdi_writeback *wb) +{ + static unsigned int tux3_dirty_expire_interval = 30 * 100; + + int has_old = 0; + + /* + * We don't flush for each inodes. So, we flush all for each + * tux3_dirty_expire_interval. + * + * FIXME: we should pickup only older inodes? + */ + spin_lock(&wb->list_lock); + if (wb_has_dirty_io(wb)) { + unsigned long older_than_this = jiffies - + msecs_to_jiffies(tux3_dirty_expire_interval * 10); + struct inode *inode = + list_entry(wb->b_dirty.prev, struct inode, i_wb_list); + + if (!inode_dirtied_after(inode, older_than_this)) + has_old = 1; + } + spin_unlock(&wb->list_lock); + + return has_old; +} + +static long tux3_wb_check_old_data_flush(struct bdi_writeback *wb) +{ + /* Hack: dirty_expire_interval is not exported to module */ + unsigned long expired; + + /* + * When set to zero, disable periodic writeback + */ + if (!dirty_writeback_interval) + return 0; + + expired = wb->last_old_flush + + msecs_to_jiffies(dirty_writeback_interval * 10); + if (time_before(jiffies, expired)) + return 0; + + wb->last_old_flush = jiffies; + + if (!tux3_has_old_data(wb)) { + /* + * If now after interval, we return 1 at least, to + * avoid to run tux3_wb_check_background_flush(). + */ + return 1; + } + + struct wb_writeback_work work = { + .nr_pages = 0, + .sync_mode = WB_SYNC_NONE, + .for_kupdate = 1, + .range_cyclic = 1, + .reason = WB_REASON_PERIODIC, + }; + + return tux3_wb_writeback(wb, &work); +} + +static inline int tux3_over_bground_thresh(struct backing_dev_info *bdi, + long wrote) +{ + /* + * FIXME: Memory pressure functions are not exported to module. + * + * So, if we didn't wrote any data on this wakeup, we assume + * this wakeup call is from memory pressure. + */ + return !wrote; +} + +static long tux3_wb_check_background_flush(struct bdi_writeback *wb, long wrote) +{ + if (tux3_over_bground_thresh(wb->bdi, wrote)) { + + struct wb_writeback_work work = { + .nr_pages = LONG_MAX, + .sync_mode = WB_SYNC_NONE, + .for_background = 1, + .range_cyclic = 1, + .reason = WB_REASON_BACKGROUND, + }; + + return tux3_wb_writeback(wb, &work); + } + + return 0; +} + static struct wb_writeback_work * get_next_work_item(struct backing_dev_info *bdi) { @@ -117,9 +300,9 @@ static long tux3_do_writeback(struct bdi_writeback *wb, int force_wait) */ if (force_wait) work->sync_mode = WB_SYNC_ALL; -#if 0 - wrote += wb_writeback(wb, work); -#endif + + wrote += tux3_wb_writeback(wb, work); + /* * Notify the caller of completion if this is a synchronous * work item, otherwise just free it. @@ -130,13 +313,12 @@ static long tux3_do_writeback(struct bdi_writeback *wb, int force_wait) kfree(work); } trace("flush done"); -#if 0 + /* * Check for periodic writeback, kupdated() style */ - wrote += wb_check_old_data_flush(wb); - wrote += wb_check_background_flush(wb); -#endif + wrote += tux3_wb_check_old_data_flush(wb); + wrote += tux3_wb_check_background_flush(wb, wrote); clear_bit(BDI_writeback_running, &wb->bdi->state); return wrote; @@ -217,7 +399,7 @@ static int tux3_congested_fn(void *congested_data, int bdi_bits) * Otherwise, writeback will clear dirty, and inode can be reclaimed * without our control. */ -int tux3_setup_flusher(struct sb *sb) +int tux3_init_flusher(struct sb *sb) { struct backing_dev_info *bdi = &sb->bdi; dev_t dev = vfs_sb(sb)->s_bdev->bd_dev; @@ -257,7 +439,7 @@ int tux3_setup_flusher(struct sb *sb) return 0; } -void tux3_cleanup_flusher(struct sb *sb) +void tux3_exit_flusher(struct sb *sb) { struct backing_dev_info *bdi = vfs_sb(sb)->s_bdi; @@ -268,4 +450,30 @@ void tux3_cleanup_flusher(struct sb *sb) bdi->capabilities &= ~BDI_CAP_NO_WRITEBACK; bdi_destroy(bdi); } + +static void schedule_flush_delta(struct sb *sb) +{ + /* Wake up waiters for pending marshal delta */ + wake_up_all(&sb->delta_event_wq); +} + +static void try_delta_transition(struct sb *sb) +{ +#if 0 + trace("marshal %u, backend_state %lx", + sb->marshal_delta, sb->backend_state); + sync_inodes_sb(vfs_sb(sb)); +#endif +} + +static int sync_current_delta(struct sb *sb, enum rollup_flags rollup_flag) +{ + /* FORCE_ROLLUP is not supported */ + WARN_ON(rollup_flag == FORCE_ROLLUP); + /* This is called only for fsync, so we can take ->s_umount here */ + down_read(&vfs_sb(sb)->s_umount); + sync_inodes_sb(vfs_sb(sb)); + up_read(&vfs_sb(sb)->s_umount); + return 0; /* FIXME: error code */ +} #endif /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK */ diff --git a/fs/tux3/super.c b/fs/tux3/super.c index 3b3825d261f594..7dc70025318630 100644 --- a/fs/tux3/super.c +++ b/fs/tux3/super.c @@ -95,8 +95,6 @@ static void __tux3_put_super(struct sb *sbi) { cleanup_dirty_for_umount(sbi); - tux3_exit_flusher(sbi); - /* All forked buffers should be freed here */ free_forked_buffers(sbi, NULL, 1); @@ -117,7 +115,7 @@ static void __tux3_put_super(struct sb *sbi) sbi->volmap = NULL; /* Cleanup flusher after inode was evicted */ - tux3_cleanup_flusher(sbi); + tux3_exit_flusher(sbi); /* FIXME: add more sanity check */ assert(list_empty(&sbi->alloc_inodes)); @@ -164,7 +162,7 @@ struct replay *tux3_init_fs(struct sb *sbi) int err; /* Initialize flusher before setup inode */ - err = tux3_setup_flusher(sbi); + err = tux3_init_flusher(sbi); if (err) { tux3_err(sbi, "failed to initialize flusher"); goto error; @@ -209,10 +207,6 @@ struct replay *tux3_init_fs(struct sb *sbi) goto error_inode; sbi->rootdir = inode; - err = tux3_init_flusher(sbi); - if (err) - goto error; - err = replay_stage2(rp); if (err) { rp = NULL; @@ -347,12 +341,14 @@ static void tux3_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, tux3_i_callback); } +#if TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK static int tux3_sync_fs(struct super_block *sb, int wait) { /* FIXME: We should support "wait" parameter. */ trace_on("wait (%u) parameter is unsupported for now", wait); return force_delta(tux_sb(sb)); } +#endif static void tux3_put_super(struct super_block *sb) { @@ -393,7 +389,10 @@ static const struct super_operations tux3_super_ops = { .evict_inode = tux3_evict_inode, /* FIXME: we have to handle write_inode of sync (e.g. cache pressure) */ // .write_inode = tux3_write_inode, +#if TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK + /* If TUX3_FLUSHER_ASYNC_HACK, normal kernel flush request does all */ .sync_fs = tux3_sync_fs, +#endif .put_super = tux3_put_super, .statfs = tux3_statfs, }; diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h index 6bcd1f3364706c..94660d22879a55 100644 --- a/fs/tux3/tux3.h +++ b/fs/tux3/tux3.h @@ -250,7 +250,7 @@ struct sb { unsigned marshal_delta; /* marshaling delta */ unsigned committed_delta; /* committed delta */ wait_queue_head_t delta_event_wq; /* wait queue for delta event */ -#if TUX3_FLUSHER != TUX3_FLUSHER_SYNC +#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN struct task_struct *flush_task; /* work to flush delta */ #endif #if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_HACK |