diff options
author | OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> | 2014-03-22 04:54:05 +0900 |
---|---|---|
committer | Daniel Phillips <daniel@tux3.org> | 2014-03-22 04:54:05 +0900 |
commit | 9b8da2c37b75acb68de614ce9a0340303db92226 (patch) | |
tree | 2fa4c9e116295dd6626e81a2ef84ba202645dd02 | |
parent | 6c7a4b82b346010288d15f9addf11d7116d777e3 (diff) | |
download | linux-tux3-9b8da2c37b75acb68de614ce9a0340303db92226.tar.gz |
tux3: Optimize deferred inums search
For now, we are using inode hash to find, deferred allocated
inums. But, loop of find_inode() consumes cpu and slow on some
situations.
To fix, this replaces inode hash with simple deferred inums bitmap.
struct tux3_idefer_map is hash table.
struct tux3_idefer_node is bitmap for BITMAP_SIZE bits.
"node" has index and count, and hashed into "map".
With this simple bitmap, we can find free inum (skip deferred inums)
by find_next_zero_bit().
And dbench become about from 50MB/s to 400 MB/s. (For now, it seems to
be fast)
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r-- | fs/tux3/commit.c | 23 | ||||
-rw-r--r-- | fs/tux3/inode.c | 82 | ||||
-rw-r--r-- | fs/tux3/inode_defer.c | 218 | ||||
-rw-r--r-- | fs/tux3/super.c | 13 | ||||
-rw-r--r-- | fs/tux3/tux3.h | 8 |
5 files changed, 284 insertions, 60 deletions
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c index b64e61782bad2..f54a0fc33194e 100644 --- a/fs/tux3/commit.c +++ b/fs/tux3/commit.c @@ -26,7 +26,7 @@ static void schedule_flush_delta(struct sb *sb); #define ALLOW_FRONTEND_MODIFY /* Initialize the lock and list */ -static void init_sb(struct sb *sb) +static int init_sb(struct sb *sb) { int i; @@ -53,6 +53,12 @@ static void init_sb(struct sb *sb) /* Initialize sb_delta_dirty */ for (i = 0; i < ARRAY_SIZE(sb->s_ddc); i++) INIT_LIST_HEAD(&sb->s_ddc[i].dirty_inodes); + + sb->idefer_map = tux3_alloc_idefer_map(); + if (!sb->idefer_map) + return -ENOMEM; + + return 0; } static void setup_roots(struct sb *sb, struct disksuper *super) @@ -115,10 +121,17 @@ static void __setup_sb(struct sb *sb, struct disksuper *super) } /* Initialize and setup sb by on-disk super block */ -void setup_sb(struct sb *sb, struct disksuper *super) +int setup_sb(struct sb *sb, struct disksuper *super) { - init_sb(sb); + int err; + + err = init_sb(sb); + if (err) + return err; + __setup_sb(sb, super); + + return 0; } /* Load on-disk super block, and call setup_sb() with it */ @@ -128,7 +141,9 @@ int load_sb(struct sb *sb) int err; /* At least initialize sb, even if load is failed */ - init_sb(sb); + err = init_sb(sb); + if (err) + return err; err = devio(READ, sb_dev(sb), SB_LOC, super, SB_LEN); if (err) diff --git a/fs/tux3/inode.c b/fs/tux3/inode.c index 70175efbf8a9e..3e3f9d1725330 100644 --- a/fs/tux3/inode.c +++ b/fs/tux3/inode.c @@ -90,6 +90,9 @@ struct inode *tux_new_inode(struct inode *dir, struct tux_iattr *iattr, /* * Deferred ileaf update for inode number allocation */ + +#include "inode_defer.c" + /* must hold itree->btree.lock */ static int is_defer_alloc_inum(struct inode *inode) { @@ -97,41 +100,25 @@ static int is_defer_alloc_inum(struct inode *inode) } /* must hold itree->btree.lock */ -static int find_defer_alloc_inum(struct sb *sb, inum_t inum) -{ - /* - * FIXME: temporary hack. We should replace this by efficient - * one something like bitmap. - */ -#if 0 - struct tux3_inode *tuxnode; - - list_for_each_entry(tuxnode, &sb->alloc_inodes, alloc_list) { - if (tuxnode->inum == inum) - return 1; - } -#else - struct inode *tmp = tux3_ilookup_nowait(sb, inum); - if (tmp) { - iput(tmp); - return 1; - } -#endif - return 0; -} - -/* must hold itree->btree.lock */ -static void add_defer_alloc_inum(struct inode *inode) +static int add_defer_alloc_inum(struct inode *inode) { /* FIXME: need to reserve space (ileaf/bnodes) for this inode? */ struct sb *sb = tux_sb(inode->i_sb); - list_add_tail(&tux_inode(inode)->alloc_list, &sb->alloc_inodes); + struct tux3_inode *tuxnode = tux_inode(inode); + int err = tux3_idefer_add(sb->idefer_map, tuxnode->inum); + if (!err) + list_add_tail(&tuxnode->alloc_list, &sb->alloc_inodes); + return err; } /* must hold itree->btree.lock. FIXME: spinlock is enough? */ void del_defer_alloc_inum(struct inode *inode) { - list_del_init(&tux_inode(inode)->alloc_list); + struct sb *sb = tux_sb(inode->i_sb); + struct tux3_inode *tuxnode = tux_inode(inode); + if (!list_empty(&tuxnode->alloc_list)) + tux3_idefer_del(sb->idefer_map, tuxnode->inum); + list_del_init(&tuxnode->alloc_list); } void cancel_defer_alloc_inum(struct inode *inode) @@ -246,28 +233,18 @@ static int alloc_inum(struct inode *inode, inum_t goal) down_write(&cursor->btree->lock); while (1) { + inum_t orig; + err = find_free_inum(cursor, goal, &goal); if (err) goto error; - /* - * Is this inum already used by deferred inum allocation? - * - * FIXME: Can be nfsd race happened, or fs corruption. - * And we would want to move this outside btree->lock. - */ - if (insert_inode_locked4(inode, goal, tux_test, &goal) >= 0) + /* Skip deferred allocate inums */ + orig = goal; + goal = tux3_idefer_find_free(sb->idefer_map, orig); + if (orig == goal) break; - - /* - * Skip deferred allocate inums. - * - * FIXME: This is inefficient, we should replace with - * better way. - */ - goal++; - while (find_defer_alloc_inum(sb, goal)) - goal++; + /* If goal marked as deferred inums, retry from itree */ } init_btree(&tux_inode(inode)->btree, sb, no_root, dtree_ops()); @@ -276,7 +253,9 @@ static int alloc_inum(struct inode *inode, inum_t goal) tux_set_inum(inode, goal); tux_setup_inode(inode); - add_defer_alloc_inum(inode); + err = add_defer_alloc_inum(inode); + if (err) + goto error; /* * If inum is not reserved area, account it. If inum is @@ -316,25 +295,22 @@ static void tux_assign_inum_failed(struct inode *inode) int tux_assign_inum(struct inode *inode, inum_t goal) { + inum_t inum; int err; err = alloc_inum(inode, goal); if (err) goto error; -#if 0 - /* - * FIXME: temporary hack. We shouldn't insert inode to hash - * in alloc_inum before initializing completely. - */ - inum_t inum = tux_inode(inode)->inum; + + inum = tux_inode(inode)->inum; if (insert_inode_locked4(inode, inum, tux_test, &inum) < 0) { /* Can be nfsd race happened, or fs corruption. */ - tux3_warn(tux_sb(dir->i_sb), "inode insert error: inum %Lx", + tux3_warn(tux_sb(inode->i_sb), "inode insert error: inum %Lx", inum); err = -EIO; goto error; } -#endif + /* The inode was hashed, we can use deferred deletion from here */ /* diff --git a/fs/tux3/inode_defer.c b/fs/tux3/inode_defer.c new file mode 100644 index 0000000000000..2acf9f187c39e --- /dev/null +++ b/fs/tux3/inode_defer.c @@ -0,0 +1,218 @@ +/* + * Deferred inum allocation management. + * + * This uses bitmap to find current in-flight deferred inum + * allocation. For each the node represent BITMAP_SIZE bits, and the + * node is hashed on struct tux3_idefer_map. + */ + +/* Bitmap node hash size */ +#define NODE_HASH_BITS 8 +#define NODE_HASH_SIZE (1 << NODE_HASH_BITS) + +/* Bitmap size in byte and bit */ +#define BITMAP_BYTE_SHIFT 6 +#define BITMAP_BYTE_SIZE (1 << BITMAP_BYTE_SHIFT) +#define BITMAP_SHIFT (BITMAP_BYTE_SHIFT + 3) +#define BITMAP_SIZE (1 << BITMAP_SHIFT) +#define BITMAP_MASK (BITMAP_SIZE - 1) + +struct tux3_idefer_map { + struct hlist_head heads[NODE_HASH_SIZE]; +}; + +struct tux3_idefer_node { + struct hlist_node link; + block_t index; + unsigned count; + unsigned long bitmap[BITMAP_SIZE / sizeof(unsigned long)]; +}; + +static struct kmem_cache *tux3_idefer_node_cachep; + +struct tux3_idefer_map *tux3_alloc_idefer_map(void) +{ + struct tux3_idefer_map *map; + + map = malloc(sizeof(*map)); + if (map) { + int i; + for (i = 0; i < NODE_HASH_SIZE; i++) + INIT_HLIST_HEAD(&map->heads[i]); + } + return map; +} + +void tux3_free_idefer_map(struct tux3_idefer_map *map) +{ + if (map) { + int i; + for (i = 0; i < NODE_HASH_SIZE; i++) + assert(hlist_empty(&map->heads[i])); + free(map); + } +} + +static void tux3_idefer_init_once(void *mem) +{ + struct tux3_idefer_node *node = mem; + + INIT_HLIST_NODE(&node->link); + node->count = 0; + memset(node->bitmap, 0, sizeof(node->bitmap)); +} + +int __init tux3_init_idefer_cache(void) +{ + tux3_idefer_node_cachep = kmem_cache_create("tux3_idefer_node", + sizeof(struct tux3_idefer_node), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + tux3_idefer_init_once); + if (tux3_idefer_node_cachep == NULL) + return -ENOMEM; + return 0; +} + +void tux3_destroy_idefer_cache(void) +{ + kmem_cache_destroy(tux3_idefer_node_cachep); +} + +static struct tux3_idefer_node *idefer_alloc_node(block_t index) +{ + struct tux3_idefer_node *node; + + node = kmem_cache_alloc(tux3_idefer_node_cachep, GFP_NOFS); + if (!node) + return NULL; + + node->index = index; + + return node; +} + +static void idefer_free_node(struct tux3_idefer_node *node) +{ + kmem_cache_free(tux3_idefer_node_cachep, node); +} + +static inline unsigned idefer_hash(block_t index) +{ + return hash_64(index, NODE_HASH_BITS); +} + +static struct tux3_idefer_node * +idefer_find_node(struct hlist_head *head, block_t index, + struct hlist_node **prev) +{ + struct tux3_idefer_node *node; + + if (prev) + *prev = NULL; + + hlist_for_each_entry(node, head, link) { + if (node->index < index) { + if (prev) + *prev = &node->link; + continue; + } + if (node->index == index) + return node; + if (node->index > index) + return NULL; + } + + return NULL; +} + +/* Set a bit for deferred inum */ +static int tux3_idefer_add(struct tux3_idefer_map *map, inum_t inum) +{ + block_t index = inum >> BITMAP_SHIFT; + unsigned offset = inum & BITMAP_MASK; + struct hlist_head *head = map->heads + idefer_hash(index); + struct tux3_idefer_node *node; + struct hlist_node *prev = NULL; + + node = idefer_find_node(head, index, &prev); + if (!node) { + node = idefer_alloc_node(index); + if (!node) + return -ENOMEM; + if (prev) + hlist_add_after(prev, &node->link); + else + hlist_add_head(&node->link, head); + } + + assert(!test_bit(offset, node->bitmap)); + __set_bit(offset, node->bitmap); + node->count++; + + return 0; +} + +/* Clear a bit for deferred inum */ +static void tux3_idefer_del(struct tux3_idefer_map *map, inum_t inum) +{ + block_t index = inum >> BITMAP_SHIFT; + unsigned offset = inum & BITMAP_MASK; + struct hlist_head *head = map->heads + idefer_hash(index); + struct tux3_idefer_node *node; + + node = idefer_find_node(head, index, NULL); + assert(node); + assert(test_bit(offset, node->bitmap)); + __clear_bit(offset, node->bitmap); + node->count--; + + if (node->count == 0) { + hlist_del(&node->link); + idefer_free_node(node); + } +} + +/* Find free inum except deferred inums from specified range */ +static inum_t find_free(struct tux3_idefer_map *map, inum_t inum, inum_t limit) +{ + block_t limit_index = (limit + BITMAP_MASK) >> BITMAP_SHIFT; + block_t index = inum >> BITMAP_SHIFT; + unsigned offset = inum & BITMAP_MASK; + + while (index < limit_index) { + struct hlist_head *head = map->heads + idefer_hash(index); + struct tux3_idefer_node *node; + + node = idefer_find_node(head, index, NULL); + if (!node) + goto found; + + if (node->count != BITMAP_SIZE) { + offset = find_next_zero_bit(node->bitmap, BITMAP_SIZE, + offset); + if (offset < BITMAP_SIZE) + goto found; + } + + index++; + offset = 0; + } + + return TUX_INVALID_INO; + +found: + return (index << BITMAP_SHIFT) + offset; +} + +/* Find free inum except deferred inums */ +static inum_t tux3_idefer_find_free(struct tux3_idefer_map *map, inum_t start) +{ + inum_t inum; + + inum = find_free(map, start, TUXKEY_LIMIT); + if (inum == TUX_INVALID_INO) + inum = find_free(map, TUX_NORMAL_INO, start); + + assert(inum != TUX_INVALID_INO); + return inum; +} diff --git a/fs/tux3/super.c b/fs/tux3/super.c index fb66234920475..140bcd593a43f 100644 --- a/fs/tux3/super.c +++ b/fs/tux3/super.c @@ -128,6 +128,8 @@ static void __tux3_put_super(struct sb *sbi) /* Cleanup flusher after inode was evicted */ tux3_exit_flusher(sbi); + tux3_free_idefer_map(sbi->idefer_map); + sbi->idefer_map = NULL; /* FIXME: add more sanity check */ assert(list_empty(&sbi->alloc_inodes)); assert(link_empty(&sbi->forked_buffers)); @@ -533,6 +535,10 @@ static int __init init_tux3(void) if (err) goto error_hole; + err = tux3_init_idefer_cache(); + if (err) + goto error_idefer; + err = register_filesystem(&tux3_fs_type); if (err) goto error_fs; @@ -540,9 +546,11 @@ static int __init init_tux3(void) return 0; error_fs: - tux3_destroy_inodecache(); -error_hole: + tux3_destroy_idefer_cache(); +error_idefer: tux3_destroy_hole_cache(); +error_hole: + tux3_destroy_inodecache(); error: return err; } @@ -550,6 +558,7 @@ error: static void __exit exit_tux3(void) { unregister_filesystem(&tux3_fs_type); + tux3_destroy_idefer_cache(); tux3_destroy_hole_cache(); tux3_destroy_inodecache(); } diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h index b10dc6fd8189b..a9b9eff8148c7 100644 --- a/fs/tux3/tux3.h +++ b/fs/tux3/tux3.h @@ -232,6 +232,7 @@ struct countmap_pin { struct buffer_head *buffer; }; +struct tux3_idefer_map; /* Tux3-specific sb is a handle for the entire volume state */ struct sb { union { @@ -310,6 +311,7 @@ struct sb { */ spinlock_t countmap_lock; struct countmap_pin countmap_pin; + struct tux3_idefer_map *idefer_map; struct list_head alloc_inodes; /* deferred inum allocation inodes */ spinlock_t forked_buffers_lock; @@ -761,7 +763,7 @@ int replay_bnode_del(struct replay *rp, block_t bnode, tuxkey_t key, unsigned co int replay_bnode_adjust(struct replay *rp, block_t bnode, tuxkey_t from, tuxkey_t to); /* commit.c */ -void setup_sb(struct sb *sb, struct disksuper *super); +int setup_sb(struct sb *sb, struct disksuper *super); int load_sb(struct sb *sb); int save_sb(struct sb *sb); void tux3_start_backend(struct sb *sb); @@ -846,6 +848,10 @@ struct inode *tux_new_volmap(struct sb *sb); struct inode *tux_new_logmap(struct sb *sb); struct inode *tux_new_inode(struct inode *dir, struct tux_iattr *iattr, dev_t rdev); +struct tux3_idefer_map *tux3_alloc_idefer_map(void); +void tux3_free_idefer_map(struct tux3_idefer_map *map); +int __init tux3_init_idefer_cache(void); +void tux3_destroy_idefer_cache(void); void del_defer_alloc_inum(struct inode *inode); void cancel_defer_alloc_inum(struct inode *inode); int tux_assign_inum(struct inode *inode, inum_t goal); |