aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>2013-06-17 15:01:28 +0900
committerDaniel Phillips <daniel@tux3.org>2013-06-17 15:01:28 +0900
commit82b74d1e5559f14656db2133ea6b0aae24ba89ad (patch)
treeac85a58f587ac03f909ec0e353327f7d746b0ced
parent3abe658c0f67bc3617932e929266f70220b78f9d (diff)
downloadlinux-tux3-82b74d1e5559f14656db2133ea6b0aae24ba89ad.tar.gz
tux3: Hack, Use own bdi flusher
On previous patch, we added own bdi flusher (TUX3_FLUSHER_ASYNC_HACK). But, on previous patch, bdi flusher is not any data actually. This patch starts to handle the kernel flush requests via own bdi flusher. FIXME: the way of delta transition timing may not be efficient. Because it is starting delta transition after flush request. FIXME: we are flushing all data for any request. We may choose data to flush conditionally. FIXME: we are waiting the commit always. This wait is unnecessary. Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r--fs/tux3/commit_flusher.c44
-rw-r--r--fs/tux3/commit_flusher.h2
-rw-r--r--fs/tux3/commit_flusher_hack.c226
-rw-r--r--fs/tux3/super.c15
-rw-r--r--fs/tux3/tux3.h2
5 files changed, 240 insertions, 49 deletions
diff --git a/fs/tux3/commit_flusher.c b/fs/tux3/commit_flusher.c
index 229ac3254d22ed..25d8ac30e0c272 100644
--- a/fs/tux3/commit_flusher.c
+++ b/fs/tux3/commit_flusher.c
@@ -1,7 +1,15 @@
+#if TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK
#include "tux3.h"
-#if TUX3_FLUSHER != TUX3_FLUSHER_SYNC
+static void __tux3_init_flusher(struct sb *sb)
+{
+#ifdef __KERNEL__
+ /* Disable writeback task to control inode reclaim by dirty flags */
+ vfs_sb(sb)->s_bdi = &noop_backing_dev_info;
+#endif
+}
+#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN
static int flush_delta_work(void *data)
{
struct sb *sb = data;
@@ -37,6 +45,8 @@ int tux3_init_flusher(struct sb *sb)
struct task_struct *task;
char b[BDEVNAME_SIZE];
+ __tux3_init_flusher(sb);
+
bdevname(vfs_sb(sb)->s_bdev, b);
/* FIXME: we should use normal bdi-writeback by changing core */
@@ -57,29 +67,17 @@ void tux3_exit_flusher(struct sb *sb)
}
}
-#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN
-int tux3_setup_flusher(struct sb *sb)
-{
- /* Disable writeback task to control inode reclaim by dirty flags */
- vfs_sb(sb)->s_bdi = &noop_backing_dev_info;
- return 0;
-}
-
-void tux3_cleanup_flusher(struct sb *sb)
-{
-}
-#endif /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_OWN */
-
static void schedule_flush_delta(struct sb *sb)
{
/* Start the flusher for pending delta */
wake_up_process(sb->flush_task);
}
-#else /* TUX3_FLUSHER == TUX3_FLUSHER_SYNC */
+#else /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_OWN */
int tux3_init_flusher(struct sb *sb)
{
+ __tux3_init_flusher(sb);
return 0;
}
@@ -87,19 +85,6 @@ void tux3_exit_flusher(struct sb *sb)
{
}
-int tux3_setup_flusher(struct sb *sb)
-{
-#ifdef __KERNEL__
- /* Disable writeback task to control inode reclaim by dirty flags */
- vfs_sb(sb)->s_bdi = &noop_backing_dev_info;
-#endif
- return 0;
-}
-
-void tux3_cleanup_flusher(struct sb *sb)
-{
-}
-
static void schedule_flush_delta(struct sb *sb)
{
/* Wake up waiters for pending marshal delta */
@@ -118,7 +103,7 @@ static int flush_pending_delta(struct sb *sb)
out:
return err;
}
-#endif /* TUX3_FLUSHER == TUX3_FLUSHER_SYNC */
+#endif /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_OWN */
/* Try delta transition */
static void try_delta_transition(struct sb *sb)
@@ -212,3 +197,4 @@ static int sync_current_delta(struct sb *sb, enum rollup_flags rollup_flag)
return err;
}
+#endif /* TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_HACK */
diff --git a/fs/tux3/commit_flusher.h b/fs/tux3/commit_flusher.h
index 7dbe76beb10ba7..41a204e9f5dc9b 100644
--- a/fs/tux3/commit_flusher.h
+++ b/fs/tux3/commit_flusher.h
@@ -18,7 +18,5 @@ static inline void tux3_start_periodical_flusher(struct sb *sb) { }
int tux3_init_flusher(struct sb *sb);
void tux3_exit_flusher(struct sb *sb);
-int tux3_setup_flusher(struct sb *sb);
-void tux3_cleanup_flusher(struct sb *sb);
#endif /* !TUX3_COMMIT_FLUSHER_H */
diff --git a/fs/tux3/commit_flusher_hack.c b/fs/tux3/commit_flusher_hack.c
index 47e8d318608436..3b2df20899f2a9 100644
--- a/fs/tux3/commit_flusher_hack.c
+++ b/fs/tux3/commit_flusher_hack.c
@@ -80,6 +80,189 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */
};
+/* Do the delta transition until specified delta */
+static int try_delta_transition_until_delta(struct sb *sb, unsigned delta)
+{
+ trace("delta %u, marshal %u, backend_state %lx",
+ delta, sb->marshal_delta, sb->backend_state);
+
+ /* Already delta transition was started for delta */
+ if (delta_after_eq(sb->marshal_delta, delta))
+ return 1;
+
+ if (!test_and_set_bit(TUX3_COMMIT_RUNNING_BIT, &sb->backend_state)) {
+ /* Recheck after grabed TUX3_COMMIT_RUNNING_BIT */
+ if (delta_after_eq(sb->marshal_delta, delta)) {
+ clear_bit(TUX3_COMMIT_RUNNING_BIT, &sb->backend_state);
+ return 1;
+ }
+
+ delta_transition(sb);
+ }
+
+ return delta_after_eq(sb->marshal_delta, delta);
+}
+
+/* Advance delta transition until specified delta */
+static int wait_for_transition(struct sb *sb, unsigned delta)
+{
+ return wait_event_killable(sb->delta_event_wq,
+ try_delta_transition_until_delta(sb, delta));
+}
+
+static long tux3_wb_writeback(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
+{
+ struct sb *sb = container_of(wb->bdi, struct sb, bdi);
+ struct delta_ref *delta_ref;
+ unsigned delta;
+ int err;
+
+ if (!wb_has_dirty_io(wb))
+ return 0;
+
+ /* Get delta that have to write */
+ delta_ref = delta_get(sb);
+#ifdef ROLLUP_DEBUG
+ /* NO_ROLLUP and FORCE_ROLLUP are not supported for now */
+ delta_ref->rollup_flag = ALLOW_ROLLUP;
+#endif
+ delta = delta_ref->delta;
+ delta_put(sb, delta_ref);
+
+ /* Make sure the delta transition was done for current delta */
+ err = wait_for_transition(sb, delta);
+ if (err)
+ return err;
+ assert(delta_after_eq(sb->marshal_delta, delta));
+
+ /* Wait for last referencer of delta was gone */
+ wait_event(sb->delta_event_wq,
+ test_bit(TUX3_COMMIT_PENDING_BIT, &sb->backend_state));
+
+ if (test_bit(TUX3_COMMIT_PENDING_BIT, &sb->backend_state)) {
+ clear_bit(TUX3_COMMIT_PENDING_BIT, &sb->backend_state);
+
+ err = flush_delta(sb);
+ /* FIXME: error handling */
+#if 0
+ /* wb_update_bandwidth() is not exported to module */
+ wb_update_bandwidth(wb, wb_start);
+#endif
+ }
+
+ return 1; /* FIXME: return code */
+}
+
+static bool inode_dirtied_after(struct inode *inode, unsigned long t)
+{
+ bool ret = time_after(inode->dirtied_when, t);
+#ifndef CONFIG_64BIT
+ /*
+ * For inodes being constantly redirtied, dirtied_when can get stuck.
+ * It _appears_ to be in the future, but is actually in distant past.
+ * This test is necessary to prevent such wrapped-around relative times
+ * from permanently stopping the whole bdi writeback.
+ */
+ ret = ret && time_before_eq(inode->dirtied_when, jiffies);
+#endif
+ return ret;
+}
+
+static int tux3_has_old_data(struct bdi_writeback *wb)
+{
+ static unsigned int tux3_dirty_expire_interval = 30 * 100;
+
+ int has_old = 0;
+
+ /*
+ * We don't flush for each inodes. So, we flush all for each
+ * tux3_dirty_expire_interval.
+ *
+ * FIXME: we should pickup only older inodes?
+ */
+ spin_lock(&wb->list_lock);
+ if (wb_has_dirty_io(wb)) {
+ unsigned long older_than_this = jiffies -
+ msecs_to_jiffies(tux3_dirty_expire_interval * 10);
+ struct inode *inode =
+ list_entry(wb->b_dirty.prev, struct inode, i_wb_list);
+
+ if (!inode_dirtied_after(inode, older_than_this))
+ has_old = 1;
+ }
+ spin_unlock(&wb->list_lock);
+
+ return has_old;
+}
+
+static long tux3_wb_check_old_data_flush(struct bdi_writeback *wb)
+{
+ /* Hack: dirty_expire_interval is not exported to module */
+ unsigned long expired;
+
+ /*
+ * When set to zero, disable periodic writeback
+ */
+ if (!dirty_writeback_interval)
+ return 0;
+
+ expired = wb->last_old_flush +
+ msecs_to_jiffies(dirty_writeback_interval * 10);
+ if (time_before(jiffies, expired))
+ return 0;
+
+ wb->last_old_flush = jiffies;
+
+ if (!tux3_has_old_data(wb)) {
+ /*
+ * If now after interval, we return 1 at least, to
+ * avoid to run tux3_wb_check_background_flush().
+ */
+ return 1;
+ }
+
+ struct wb_writeback_work work = {
+ .nr_pages = 0,
+ .sync_mode = WB_SYNC_NONE,
+ .for_kupdate = 1,
+ .range_cyclic = 1,
+ .reason = WB_REASON_PERIODIC,
+ };
+
+ return tux3_wb_writeback(wb, &work);
+}
+
+static inline int tux3_over_bground_thresh(struct backing_dev_info *bdi,
+ long wrote)
+{
+ /*
+ * FIXME: Memory pressure functions are not exported to module.
+ *
+ * So, if we didn't wrote any data on this wakeup, we assume
+ * this wakeup call is from memory pressure.
+ */
+ return !wrote;
+}
+
+static long tux3_wb_check_background_flush(struct bdi_writeback *wb, long wrote)
+{
+ if (tux3_over_bground_thresh(wb->bdi, wrote)) {
+
+ struct wb_writeback_work work = {
+ .nr_pages = LONG_MAX,
+ .sync_mode = WB_SYNC_NONE,
+ .for_background = 1,
+ .range_cyclic = 1,
+ .reason = WB_REASON_BACKGROUND,
+ };
+
+ return tux3_wb_writeback(wb, &work);
+ }
+
+ return 0;
+}
+
static struct wb_writeback_work *
get_next_work_item(struct backing_dev_info *bdi)
{
@@ -117,9 +300,9 @@ static long tux3_do_writeback(struct bdi_writeback *wb, int force_wait)
*/
if (force_wait)
work->sync_mode = WB_SYNC_ALL;
-#if 0
- wrote += wb_writeback(wb, work);
-#endif
+
+ wrote += tux3_wb_writeback(wb, work);
+
/*
* Notify the caller of completion if this is a synchronous
* work item, otherwise just free it.
@@ -130,13 +313,12 @@ static long tux3_do_writeback(struct bdi_writeback *wb, int force_wait)
kfree(work);
}
trace("flush done");
-#if 0
+
/*
* Check for periodic writeback, kupdated() style
*/
- wrote += wb_check_old_data_flush(wb);
- wrote += wb_check_background_flush(wb);
-#endif
+ wrote += tux3_wb_check_old_data_flush(wb);
+ wrote += tux3_wb_check_background_flush(wb, wrote);
clear_bit(BDI_writeback_running, &wb->bdi->state);
return wrote;
@@ -217,7 +399,7 @@ static int tux3_congested_fn(void *congested_data, int bdi_bits)
* Otherwise, writeback will clear dirty, and inode can be reclaimed
* without our control.
*/
-int tux3_setup_flusher(struct sb *sb)
+int tux3_init_flusher(struct sb *sb)
{
struct backing_dev_info *bdi = &sb->bdi;
dev_t dev = vfs_sb(sb)->s_bdev->bd_dev;
@@ -257,7 +439,7 @@ int tux3_setup_flusher(struct sb *sb)
return 0;
}
-void tux3_cleanup_flusher(struct sb *sb)
+void tux3_exit_flusher(struct sb *sb)
{
struct backing_dev_info *bdi = vfs_sb(sb)->s_bdi;
@@ -268,4 +450,30 @@ void tux3_cleanup_flusher(struct sb *sb)
bdi->capabilities &= ~BDI_CAP_NO_WRITEBACK;
bdi_destroy(bdi);
}
+
+static void schedule_flush_delta(struct sb *sb)
+{
+ /* Wake up waiters for pending marshal delta */
+ wake_up_all(&sb->delta_event_wq);
+}
+
+static void try_delta_transition(struct sb *sb)
+{
+#if 0
+ trace("marshal %u, backend_state %lx",
+ sb->marshal_delta, sb->backend_state);
+ sync_inodes_sb(vfs_sb(sb));
+#endif
+}
+
+static int sync_current_delta(struct sb *sb, enum rollup_flags rollup_flag)
+{
+ /* FORCE_ROLLUP is not supported */
+ WARN_ON(rollup_flag == FORCE_ROLLUP);
+ /* This is called only for fsync, so we can take ->s_umount here */
+ down_read(&vfs_sb(sb)->s_umount);
+ sync_inodes_sb(vfs_sb(sb));
+ up_read(&vfs_sb(sb)->s_umount);
+ return 0; /* FIXME: error code */
+}
#endif /* TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK */
diff --git a/fs/tux3/super.c b/fs/tux3/super.c
index 3b3825d261f594..7dc70025318630 100644
--- a/fs/tux3/super.c
+++ b/fs/tux3/super.c
@@ -95,8 +95,6 @@ static void __tux3_put_super(struct sb *sbi)
{
cleanup_dirty_for_umount(sbi);
- tux3_exit_flusher(sbi);
-
/* All forked buffers should be freed here */
free_forked_buffers(sbi, NULL, 1);
@@ -117,7 +115,7 @@ static void __tux3_put_super(struct sb *sbi)
sbi->volmap = NULL;
/* Cleanup flusher after inode was evicted */
- tux3_cleanup_flusher(sbi);
+ tux3_exit_flusher(sbi);
/* FIXME: add more sanity check */
assert(list_empty(&sbi->alloc_inodes));
@@ -164,7 +162,7 @@ struct replay *tux3_init_fs(struct sb *sbi)
int err;
/* Initialize flusher before setup inode */
- err = tux3_setup_flusher(sbi);
+ err = tux3_init_flusher(sbi);
if (err) {
tux3_err(sbi, "failed to initialize flusher");
goto error;
@@ -209,10 +207,6 @@ struct replay *tux3_init_fs(struct sb *sbi)
goto error_inode;
sbi->rootdir = inode;
- err = tux3_init_flusher(sbi);
- if (err)
- goto error;
-
err = replay_stage2(rp);
if (err) {
rp = NULL;
@@ -347,12 +341,14 @@ static void tux3_destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, tux3_i_callback);
}
+#if TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK
static int tux3_sync_fs(struct super_block *sb, int wait)
{
/* FIXME: We should support "wait" parameter. */
trace_on("wait (%u) parameter is unsupported for now", wait);
return force_delta(tux_sb(sb));
}
+#endif
static void tux3_put_super(struct super_block *sb)
{
@@ -393,7 +389,10 @@ static const struct super_operations tux3_super_ops = {
.evict_inode = tux3_evict_inode,
/* FIXME: we have to handle write_inode of sync (e.g. cache pressure) */
// .write_inode = tux3_write_inode,
+#if TUX3_FLUSHER != TUX3_FLUSHER_ASYNC_HACK
+ /* If TUX3_FLUSHER_ASYNC_HACK, normal kernel flush request does all */
.sync_fs = tux3_sync_fs,
+#endif
.put_super = tux3_put_super,
.statfs = tux3_statfs,
};
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 6bcd1f3364706c..94660d22879a55 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -250,7 +250,7 @@ struct sb {
unsigned marshal_delta; /* marshaling delta */
unsigned committed_delta; /* committed delta */
wait_queue_head_t delta_event_wq; /* wait queue for delta event */
-#if TUX3_FLUSHER != TUX3_FLUSHER_SYNC
+#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN
struct task_struct *flush_task; /* work to flush delta */
#endif
#if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_HACK