aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-03-06 02:35:56 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-03-06 04:11:50 -0500
commit0206d42daf4c4bd3bbcfa15a2bef34319524db49 (patch)
treea51d233e8ad9e9e730d1582519950f5af10532cd
parent61b25f2dd21e1abe11572f4065e75c9c4c304599 (diff)
downloadbcachefs-tools-0206d42daf4c4bd3bbcfa15a2bef34319524db49.tar.gz
Update bcachefs sources to 3856459b1b bcachefs: bch2_btree_iter_peek_node_and_restart()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--Makefile.compiler4
-rw-r--r--include/linux/slab.h9
-rw-r--r--libbcachefs/acl.c13
-rw-r--r--libbcachefs/acl.h4
-rw-r--r--libbcachefs/alloc_background.h17
-rw-r--r--libbcachefs/alloc_foreground.c21
-rw-r--r--libbcachefs/bkey.h4
-rw-r--r--libbcachefs/bset.c4
-rw-r--r--libbcachefs/bset.h4
-rw-r--r--libbcachefs/btree_cache.c4
-rw-r--r--libbcachefs/btree_cache.h4
-rw-r--r--libbcachefs/btree_io.c8
-rw-r--r--libbcachefs/btree_iter.c11
-rw-r--r--libbcachefs/btree_iter.h15
-rw-r--r--libbcachefs/clock.c4
-rw-r--r--libbcachefs/debug.c119
-rw-r--r--libbcachefs/debug.h2
-rw-r--r--libbcachefs/ec.c88
-rw-r--r--libbcachefs/ec.h16
-rw-r--r--libbcachefs/errcode.h2
-rw-r--r--libbcachefs/fs-io.c4
-rw-r--r--libbcachefs/io.c4
-rw-r--r--libbcachefs/move.c40
-rw-r--r--libbcachefs/move.h1
-rw-r--r--libbcachefs/move_types.h6
-rw-r--r--libbcachefs/movinggc.c128
-rw-r--r--libbcachefs/opts.h28
-rw-r--r--libbcachefs/reflink.c6
-rw-r--r--libbcachefs/util.c4
-rw-r--r--libbcachefs/util.h23
31 files changed, 404 insertions, 195 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 48ce6994..2845be68 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-171da96d76d03a12872c8c9e2d02602c3ddfcb5f
+3856459b1b9f37cebee2bca3c9edcafaf393aa98
diff --git a/Makefile.compiler b/Makefile.compiler
index 20d353dc..3d8adfd3 100644
--- a/Makefile.compiler
+++ b/Makefile.compiler
@@ -63,11 +63,11 @@ cc-disable-warning = $(call try-run,\
# gcc-min-version
# Usage: cflags-$(call gcc-min-version, 70100) += -foo
-gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y)
+gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1)
# clang-min-version
# Usage: cflags-$(call clang-min-version, 110000) += -foo
-clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y)
+clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1)
# ld-option
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ff122ff9..144e333e 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -15,10 +15,12 @@
#include <stdlib.h>
#include <sys/mman.h>
+#define alloc_hooks(_do, ...) _do
+
#define ARCH_KMALLOC_MINALIGN 16
#define KMALLOC_MAX_SIZE SIZE_MAX
-static inline void *kmalloc(size_t size, gfp_t flags)
+static inline void *_kmalloc(size_t size, gfp_t flags)
{
unsigned i;
void *p;
@@ -44,6 +46,7 @@ static inline void *kmalloc(size_t size, gfp_t flags)
return p;
}
+#define kmalloc _kmalloc
static inline void *krealloc(void *old, size_t size, gfp_t flags)
{
@@ -94,7 +97,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
#define kvzalloc(size, flags) kzalloc(size, flags)
#define kvfree(p) kfree(p)
-static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
+static inline struct page *_alloc_pages(gfp_t flags, unsigned int order)
{
size_t size = PAGE_SIZE << order;
unsigned i;
@@ -114,9 +117,11 @@ static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
return p;
}
+#define alloc_pages _alloc_pages
#define alloc_page(gfp) alloc_pages(gfp, 0)
+#define _get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order))
#define __get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order))
#define __get_free_page(gfp) __get_free_pages(gfp, 0)
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index 9592541f..5cb06ac5 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -212,9 +212,10 @@ bch2_acl_to_xattr(struct btree_trans *trans,
return xattr;
}
-struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
+struct posix_acl *bch2_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, int type)
{
- struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct btree_trans trans;
@@ -224,9 +225,6 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
struct bkey_s_c k;
int ret;
- if (rcu)
- return ERR_PTR(-ECHILD);
-
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -293,9 +291,10 @@ int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
}
int bch2_set_acl(struct user_namespace *mnt_userns,
- struct inode *vinode, struct posix_acl *_acl, int type)
+ struct dentry *dentry,
+ struct posix_acl *_acl, int type)
{
- struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_iter inode_iter = { NULL };
diff --git a/libbcachefs/acl.h b/libbcachefs/acl.h
index 2d76a489..ac206f65 100644
--- a/libbcachefs/acl.h
+++ b/libbcachefs/acl.h
@@ -26,12 +26,12 @@ typedef struct {
__le32 a_version;
} bch_acl_header;
-struct posix_acl *bch2_get_acl(struct inode *, int, bool);
+struct posix_acl *bch2_get_acl(struct user_namespace *, struct dentry *, int);
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
struct posix_acl *, int);
-int bch2_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int);
+int bch2_set_acl(struct user_namespace *, struct dentry *, struct posix_acl *, int);
int bch2_acl_chmod(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
umode_t, struct posix_acl **);
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 1aa7c7a0..c9ff590e 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -74,14 +74,21 @@ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
}
+#define DATA_TYPES_MOVABLE \
+ ((1U << BCH_DATA_btree)| \
+ (1U << BCH_DATA_user)| \
+ (1U << BCH_DATA_stripe))
+
+static inline bool data_type_movable(enum bch_data_type type)
+{
+ return (1U << type) & DATA_TYPES_MOVABLE;
+}
+
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
struct bch_dev *ca)
{
- if (a.data_type != BCH_DATA_btree &&
- a.data_type != BCH_DATA_user)
- return 0;
-
- if (a.dirty_sectors >= ca->mi.bucket_size)
+ if (!data_type_movable(a.data_type) ||
+ a.dirty_sectors >= ca->mi.bucket_size)
return 0;
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index b2755c1e..3a67ac0d 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -97,7 +97,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
if (ob->ec) {
- bch2_ec_bucket_written(c, ob);
+ ec_stripe_new_put(c, ob->ec);
return;
}
@@ -796,11 +796,11 @@ got_bucket:
ob->ec_idx = ec_idx;
ob->ec = h->s;
+ ec_stripe_new_get(h->s);
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
- atomic_inc(&h->s->pin);
out_put_head:
bch2_ec_stripe_head_put(c, h);
return ret;
@@ -1383,19 +1383,24 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str
unsigned data_type = ob->data_type;
barrier(); /* READ_ONCE() doesn't work on bitfields */
- prt_printf(out, "%zu ref %u %s%s%s %u:%llu gen %u\n",
+ prt_printf(out, "%zu ref %u %s %u:%llu gen %u",
ob - c->open_buckets,
atomic_read(&ob->pin),
data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
- ob->ec ? " ec" : "",
- ob->on_partial_list ? " partial" : "",
ob->dev, ob->bucket, ob->gen);
+ if (ob->ec)
+ prt_printf(out, " ec idx %llu", ob->ec->idx);
+ if (ob->on_partial_list)
+ prt_str(out, " partial");
+ prt_newline(out);
}
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
{
struct open_bucket *ob;
+ out->atomic++;
+
for (ob = c->open_buckets;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
@@ -1404,17 +1409,23 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
+
+ --out->atomic;
}
void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
{
unsigned i;
+ out->atomic++;
spin_lock(&c->freelist_lock);
+
for (i = 0; i < c->open_buckets_partial_nr; i++)
bch2_open_bucket_to_text(out, c,
c->open_buckets + c->open_buckets_partial[i]);
+
spin_unlock(&c->freelist_lock);
+ --out->atomic;
}
static const char * const bch2_write_point_states[] = {
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
index 983572ef..2650bd63 100644
--- a/libbcachefs/bkey.h
+++ b/libbcachefs/bkey.h
@@ -497,7 +497,7 @@ static inline struct bpos bkey_unpack_pos(const struct btree *b,
/* Disassembled bkeys */
-static inline struct bkey_s_c bkey_disassemble(struct btree *b,
+static inline struct bkey_s_c bkey_disassemble(const struct btree *b,
const struct bkey_packed *k,
struct bkey *u)
{
@@ -507,7 +507,7 @@ static inline struct bkey_s_c bkey_disassemble(struct btree *b,
}
/* non const version: */
-static inline struct bkey_s __bkey_disassemble(struct btree *b,
+static inline struct bkey_s __bkey_disassemble(const struct btree *b,
struct bkey_packed *k,
struct bkey *u)
{
diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c
index 3bd50f12..0216ad96 100644
--- a/libbcachefs/bset.c
+++ b/libbcachefs/bset.c
@@ -1536,9 +1536,9 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
/* Mergesort */
-void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
+void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats)
{
- struct bset_tree *t;
+ const struct bset_tree *t;
for_each_bset(b, t) {
enum bset_aux_tree_type type = bset_aux_tree_type(t);
diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h
index 2105e783..632c2b8c 100644
--- a/libbcachefs/bset.h
+++ b/libbcachefs/bset.h
@@ -213,7 +213,7 @@ static inline size_t btree_aux_data_u64s(const struct btree *b)
_k != btree_bkey_last(_b, _t); \
_k = bkey_p_next(_k))
-static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
+static inline bool bset_has_ro_aux_tree(const struct bset_tree *t)
{
return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
}
@@ -504,7 +504,7 @@ struct bset_stats {
size_t failed;
};
-void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
+void bch2_btree_keys_stats(const struct btree *, struct bset_stats *);
void bch2_bfloat_to_text(struct printbuf *, struct btree *,
struct bkey_packed *);
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index a26331db..e8530cce 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -1202,7 +1202,7 @@ wait_on_io:
}
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
- struct btree *b)
+ const struct btree *b)
{
const struct bkey_format *f = &b->format;
struct bset_stats stats;
@@ -1247,7 +1247,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
stats.failed);
}
-void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc)
+void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
{
prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index a0b9231b..4900ed45 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -100,7 +100,7 @@ static inline unsigned btree_blocks(struct bch_fs *c)
#define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->c.btree_id].b)
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
- struct btree *);
-void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *);
+ const struct btree *);
+void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
#endif /* _BCACHEFS_BTREE_CACHE_H */
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 29163b46..7a9cc378 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -105,8 +105,8 @@ static void btree_bounce_free(struct bch_fs *c, size_t size,
vpfree(p, size);
}
-static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
- bool *used_mempool)
+static void *_btree_bounce_alloc(struct bch_fs *c, size_t size,
+ bool *used_mempool)
{
unsigned flags = memalloc_nofs_save();
void *p;
@@ -114,7 +114,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
BUG_ON(size > btree_bytes(c));
*used_mempool = false;
- p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
+ p = _vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
if (!p) {
*used_mempool = true;
p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
@@ -122,6 +122,8 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
memalloc_nofs_restore(flags);
return p;
}
+#define btree_bounce_alloc(_c, _size, _used_mempool) \
+ alloc_hooks(_btree_bounce_alloc(_c, _size, _used_mempool), void *, NULL)
static void sort_bkey_ptrs(const struct btree *bt,
struct bkey_packed **ptrs, unsigned nr)
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index fdb267dd..2d344993 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -1723,6 +1723,17 @@ err:
goto out;
}
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
+{
+ struct btree *b;
+
+ while (b = bch2_btree_iter_peek_node(iter),
+ bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
+ bch2_trans_begin(iter->trans);
+
+ return b;
+}
+
struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 448be089..6b7cef14 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -295,6 +295,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
@@ -521,18 +522,6 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
u32 bch2_trans_begin(struct btree_trans *);
-static inline struct btree *
-__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
-{
- struct btree *b;
-
- while (b = bch2_btree_iter_peek_node(iter),
- bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
- bch2_trans_begin(trans);
-
- return b;
-}
-
/*
* XXX
* this does not handle transaction restarts from bch2_btree_iter_next_node()
@@ -542,7 +531,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter
_locks_want, _depth, _flags, _b, _ret) \
for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
_start, _locks_want, _depth, _flags); \
- (_b) = __btree_iter_peek_node_and_restart((_trans), &(_iter)),\
+ (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \
!((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \
(_b) = bch2_btree_iter_next_node(&(_iter)))
diff --git a/libbcachefs/clock.c b/libbcachefs/clock.c
index f3ffdbc3..00d0e672 100644
--- a/libbcachefs/clock.c
+++ b/libbcachefs/clock.c
@@ -122,7 +122,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
}
__set_current_state(TASK_RUNNING);
- del_singleshot_timer_sync(&wait.cpu_timer);
+ del_timer_sync(&wait.cpu_timer);
destroy_timer_on_stack(&wait.cpu_timer);
bch2_io_timer_del(clock, &wait.io_timer);
}
@@ -157,6 +157,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
unsigned long now;
unsigned i;
+ out->atomic++;
spin_lock(&clock->timer_lock);
now = atomic64_read(&clock->now);
@@ -165,6 +166,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
clock->timers.data[i]->fn,
clock->timers.data[i]->expire - now);
spin_unlock(&clock->timer_lock);
+ --out->atomic;
}
void bch2_io_clock_exit(struct io_clock *clock)
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 0035fe87..d1563caf 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -181,6 +181,125 @@ out:
bch2_btree_node_io_unlock(b);
}
+void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
+ const struct btree *b)
+{
+ struct btree_node *n_ondisk = NULL;
+ struct extent_ptr_decoded pick;
+ struct bch_dev *ca;
+ struct bio *bio = NULL;
+ unsigned offset = 0;
+ int ret;
+
+ if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
+ prt_printf(out, "error getting device to read from: invalid device\n");
+ return;
+ }
+
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ if (!bch2_dev_get_ioref(ca, READ)) {
+ prt_printf(out, "error getting device to read from: not online\n");
+ return;
+ }
+
+ n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+ if (!n_ondisk) {
+ prt_printf(out, "memory allocation failure\n");
+ goto out;
+ }
+
+ bio = bio_alloc_bioset(ca->disk_sb.bdev,
+ buf_pages(n_ondisk, btree_bytes(c)),
+ REQ_OP_READ|REQ_META,
+ GFP_NOIO,
+ &c->btree_bio);
+ bio->bi_iter.bi_sector = pick.ptr.offset;
+ bch2_bio_map(bio, n_ondisk, btree_bytes(c));
+
+ ret = submit_bio_wait(bio);
+ if (ret) {
+ prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret));
+ goto out;
+ }
+
+ while (offset < btree_sectors(c)) {
+ struct bset *i;
+ struct nonce nonce;
+ struct bch_csum csum;
+ struct bkey_packed *k;
+ unsigned sectors;
+
+ if (!offset) {
+ i = &n_ondisk->keys;
+
+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
+ prt_printf(out, "unknown checksum type at offset %u: %llu\n",
+ offset, BSET_CSUM_TYPE(i));
+ goto out;
+ }
+
+ nonce = btree_nonce(i, offset << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk);
+
+ if (bch2_crc_cmp(csum, n_ondisk->csum)) {
+ prt_printf(out, "invalid checksum\n");
+ goto out;
+ }
+
+ bset_encrypt(c, i, offset << 9);
+
+ sectors = vstruct_sectors(n_ondisk, c->block_bits);
+ } else {
+ struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9);
+
+ i = &bne->keys;
+
+ if (i->seq != n_ondisk->keys.seq)
+ break;
+
+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
+ prt_printf(out, "unknown checksum type at offset %u: %llu\n",
+ offset, BSET_CSUM_TYPE(i));
+ goto out;
+ }
+
+ nonce = btree_nonce(i, offset << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+ if (bch2_crc_cmp(csum, bne->csum)) {
+ prt_printf(out, "invalid checksum");
+ goto out;
+ }
+
+ bset_encrypt(c, i, offset << 9);
+
+ sectors = vstruct_sectors(bne, c->block_bits);
+ }
+
+ prt_printf(out, " offset %u version %u, journal seq %llu\n",
+ offset,
+ le16_to_cpu(i->version),
+ le64_to_cpu(i->journal_seq));
+ offset += sectors;
+
+ printbuf_indent_add(out, 4);
+
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
+ struct bkey u;
+
+ bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
+ prt_newline(out);
+ }
+
+ printbuf_indent_sub(out, 4);
+ }
+out:
+ if (bio)
+ bio_put(bio);
+ kvpfree(n_ondisk, btree_bytes(c));
+ percpu_ref_put(&ca->io_ref);
+}
+
#ifdef CONFIG_DEBUG_FS
/* XXX: bch_fs refcounting */
diff --git a/libbcachefs/debug.h b/libbcachefs/debug.h
index 0b86736e..2c37143b 100644
--- a/libbcachefs/debug.h
+++ b/libbcachefs/debug.h
@@ -9,6 +9,8 @@ struct btree;
struct bch_fs;
void __bch2_btree_verify(struct bch_fs *, struct btree *);
+void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *,
+ const struct btree *);
static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
{
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index c0342e60..7d43fd4a 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -213,8 +213,9 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
}
}
+/* XXX: this is a non-mempoolified memory allocation: */
static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
- unsigned offset, unsigned size)
+ unsigned offset, unsigned size)
{
struct bch_stripe *v = &buf->key.v;
unsigned csum_granularity = 1U << v->csum_granularity_bits;
@@ -241,7 +242,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
return 0;
err:
ec_stripe_buf_exit(buf);
- return -ENOMEM;
+ return -BCH_ERR_ENOMEM_stripe_buf;
}
/* Checksumming: */
@@ -914,6 +915,9 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
b = bch2_backpointer_get_node(trans, &node_iter, bucket, *bp_offset, bp);
bch2_trans_iter_exit(trans, &node_iter);
+ if (!b)
+ return 0;
+
prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
bch2_backpointer_to_text(&buf, &bp);
@@ -1099,6 +1103,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
}
BUG_ON(!s->allocated);
+ BUG_ON(!s->idx);
ec_generate_ec(&s->new_stripe);
@@ -1143,7 +1148,12 @@ err:
}
}
- bch2_stripe_close(c, s);
+ mutex_lock(&c->ec_stripe_new_lock);
+ list_del(&s->list);
+ mutex_unlock(&c->ec_stripe_new_lock);
+
+ if (s->idx)
+ bch2_stripe_close(c, s);
ec_stripe_buf_exit(&s->existing_stripe);
ec_stripe_buf_exit(&s->new_stripe);
@@ -1157,10 +1167,8 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list)
- if (!atomic_read(&s->pin)) {
- list_del(&s->list);
+ if (!atomic_read(&s->pin))
goto out;
- }
s = NULL;
out:
mutex_unlock(&c->ec_stripe_new_lock);
@@ -1188,14 +1196,6 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
}
-static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
-{
- BUG_ON(atomic_read(&s->pin) <= 0);
-
- if (atomic_dec_and_test(&s->pin))
- bch2_ec_do_stripe_creates(c);
-}
-
static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s = h->s;
@@ -1212,14 +1212,6 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
ec_stripe_new_put(c, s);
}
-/* have a full bucket - hand it off to be erasure coded: */
-void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
-{
- struct ec_stripe_new *s = ob->ec;
-
- ec_stripe_new_put(c, s);
-}
-
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
{
struct ec_stripe_new *s = ob->ec;
@@ -1236,6 +1228,8 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
if (!ob)
return NULL;
+ BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]);
+
ca = bch_dev_bkey_exists(c, ob->dev);
offset = ca->mi.bucket_size - ob->sectors_free;
@@ -1436,6 +1430,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
bool have_cache = true;
int ret = 0;
+ BUG_ON(h->s->new_stripe.key.v.nr_blocks != h->s->nr_data + h->s->nr_parity);
+ BUG_ON(h->s->new_stripe.key.v.nr_redundant != h->s->nr_parity);
+
for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
__clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
if (i < h->s->nr_data)
@@ -1546,9 +1543,13 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
s64 idx;
int ret;
+ /*
+ * If we can't allocate a new stripe, and there's no stripes with empty
+ * blocks for us to reuse, that means we have to wait on copygc:
+ */
idx = get_existing_stripe(c, h);
if (idx < 0)
- return -BCH_ERR_ENOSPC_stripe_reuse;
+ return -BCH_ERR_stripe_alloc_blocked;
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
if (ret) {
@@ -1558,12 +1559,14 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
return ret;
}
- if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
- /*
- * this is a problem: we have deleted from the
- * stripes heap already
- */
- BUG();
+ BUG_ON(h->s->existing_stripe.key.v.nr_redundant != h->s->nr_parity);
+ h->s->nr_data = h->s->existing_stripe.key.v.nr_blocks -
+ h->s->existing_stripe.key.v.nr_redundant;
+
+ ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
+ if (ret) {
+ bch2_stripe_close(c, h->s);
+ return ret;
}
BUG_ON(h->s->existing_stripe.size != h->blocksize);
@@ -1675,9 +1678,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
bch_err(c, "failed to allocate new stripe");
goto err;
}
-
- if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize))
- BUG();
}
if (h->s->allocated)
@@ -1690,7 +1690,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
ret = new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h);
if (!ret)
- goto allocated;
+ goto allocate_buf;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, ENOMEM))
goto err;
@@ -1703,8 +1703,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
ret = __bch2_ec_stripe_head_reuse(trans, h);
if (!ret)
break;
- if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl)
- ret = -BCH_ERR_stripe_alloc_blocked;
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
@@ -1723,10 +1721,16 @@ alloc_existing:
ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
if (ret)
goto err;
-allocated:
+
+allocate_buf:
+ ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
+ if (ret)
+ goto err;
+
h->s->allocated = true;
+allocated:
BUG_ON(!h->s->idx);
-
+ BUG_ON(!h->s->new_stripe.data[0]);
BUG_ON(trans->restarted);
return h;
err:
@@ -1839,8 +1843,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
h->target, h->algo, h->redundancy);
if (h->s)
- prt_printf(out, "\tpending: blocks %u+%u allocated %u\n",
- h->s->nr_data, h->s->nr_parity,
+ prt_printf(out, "\tpending: idx %llu blocks %u+%u allocated %u\n",
+ h->s->idx, h->s->nr_data, h->s->nr_parity,
bitmap_weight(h->s->blocks_allocated,
h->s->nr_data));
}
@@ -1848,9 +1852,9 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list) {
- prt_printf(out, "\tin flight: blocks %u+%u pin %u\n",
- s->nr_data, s->nr_parity,
- atomic_read(&s->pin));
+ prt_printf(out, "\tin flight: idx %llu blocks %u+%u pin %u\n",
+ s->idx, s->nr_data, s->nr_parity,
+ atomic_read(&s->pin));
}
mutex_unlock(&c->ec_stripe_new_lock);
}
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index 56d1b5e7..d112aea9 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -198,7 +198,6 @@ int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
-void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *);
void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
@@ -213,6 +212,21 @@ void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
void bch2_do_stripe_deletes(struct bch_fs *);
+void bch2_ec_do_stripe_creates(struct bch_fs *);
+
+static inline void ec_stripe_new_get(struct ec_stripe_new *s)
+{
+ atomic_inc(&s->pin);
+}
+
+static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
+{
+ BUG_ON(atomic_read(&s->pin) <= 0);
+ BUG_ON(!s->err && !s->idx);
+
+ if (atomic_dec_and_test(&s->pin))
+ bch2_ec_do_stripe_creates(c);
+}
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 283303db..162e3156 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -3,11 +3,11 @@
#define _BCACHEFS_ERRCODE_H
#define BCH_ERRCODES() \
+ x(ENOMEM, ENOMEM_stripe_buf) \
x(ENOSPC, ENOSPC_disk_reservation) \
x(ENOSPC, ENOSPC_bucket_alloc) \
x(ENOSPC, ENOSPC_disk_label_add) \
x(ENOSPC, ENOSPC_stripe_create) \
- x(ENOSPC, ENOSPC_stripe_reuse) \
x(ENOSPC, ENOSPC_inode_create) \
x(ENOSPC, ENOSPC_str_hash_create) \
x(ENOSPC, ENOSPC_snapshot_create) \
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index e088bbcc..b511735b 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -1217,7 +1217,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
bch2_page_state_create(page, __GFP_NOFAIL);
- bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
+ rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
rbio->bio.bi_iter.bi_sector =
(sector_t) page->index << PAGE_SECTORS_SHIFT;
BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
@@ -2017,7 +2017,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
&c->bio_read);
bio->bi_end_io = bch2_direct_IO_read_split_endio;
start:
- bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
+ bio->bi_opf = REQ_OP_READ|REQ_SYNC;
bio->bi_iter.bi_sector = offset >> 9;
bio->bi_private = dio;
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index de30daca..ea0fd631 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -835,6 +835,10 @@ static void bch2_write_index(struct closure *cl)
struct write_point *wp = op->wp;
struct workqueue_struct *wq = index_update_wq(op);
+ if ((op->flags & BCH_WRITE_DONE) &&
+ (op->flags & BCH_WRITE_MOVE))
+ bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
+
barrier();
/*
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 11ea109f..5e952d6c 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -41,18 +41,19 @@ static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats)
}
struct moving_io {
- struct list_head list;
- struct closure cl;
- bool read_completed;
+ struct list_head list;
+ struct move_bucket_in_flight *b;
+ struct closure cl;
+ bool read_completed;
- unsigned read_sectors;
- unsigned write_sectors;
+ unsigned read_sectors;
+ unsigned write_sectors;
- struct bch_read_bio rbio;
+ struct bch_read_bio rbio;
- struct data_update write;
+ struct data_update write;
/* Must be last since it is variable size */
- struct bio_vec bi_inline_vecs[0];
+ struct bio_vec bi_inline_vecs[0];
};
static void move_free(struct moving_io *io)
@@ -60,6 +61,9 @@ static void move_free(struct moving_io *io)
struct moving_context *ctxt = io->write.ctxt;
struct bch_fs *c = ctxt->c;
+ if (io->b)
+ atomic_dec(&io->b->count);
+
bch2_data_update_exit(&io->write);
wake_up(&ctxt->wait);
bch2_write_ref_put(c, BCH_WRITE_REF_move);
@@ -235,6 +239,7 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
static int bch2_move_extent(struct btree_trans *trans,
struct btree_iter *iter,
struct moving_context *ctxt,
+ struct move_bucket_in_flight *bucket_in_flight,
struct bch_io_opts io_opts,
enum btree_id btree_id,
struct bkey_s_c k,
@@ -295,7 +300,7 @@ static int bch2_move_extent(struct btree_trans *trans,
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
io->rbio.bio.bi_iter.bi_size = sectors << 9;
- bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
+ io->rbio.bio.bi_opf = REQ_OP_READ;
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
@@ -320,6 +325,11 @@ static int bch2_move_extent(struct btree_trans *trans,
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
}
+ if (bucket_in_flight) {
+ io->b = bucket_in_flight;
+ atomic_inc(&io->b->count);
+ }
+
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
trace_move_extent_read(k.k);
@@ -522,8 +532,8 @@ static int __bch2_move_data(struct moving_context *ctxt,
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
- ret2 = bch2_move_extent(&trans, &iter, ctxt, io_opts,
- btree_id, k, data_opts);
+ ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL,
+ io_opts, btree_id, k, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
continue;
@@ -665,6 +675,7 @@ failed_to_evacuate:
int __bch2_evacuate_bucket(struct btree_trans *trans,
struct moving_context *ctxt,
+ struct move_bucket_in_flight *bucket_in_flight,
struct bpos bucket, int gen,
struct data_update_opts _data_opts)
{
@@ -753,8 +764,9 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
i++;
}
- ret = bch2_move_extent(trans, &iter, ctxt, io_opts,
- bp.btree_id, k, data_opts);
+ ret = bch2_move_extent(trans, &iter, ctxt,
+ bucket_in_flight,
+ io_opts, bp.btree_id, k, data_opts);
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -834,7 +846,7 @@ int bch2_evacuate_bucket(struct bch_fs *c,
bch2_trans_init(&trans, c, 0, 0);
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
- ret = __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts);
+ ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts);
bch2_moving_ctxt_exit(&ctxt);
bch2_trans_exit(&trans);
diff --git a/libbcachefs/move.h b/libbcachefs/move.h
index 3b283af3..4c001387 100644
--- a/libbcachefs/move.h
+++ b/libbcachefs/move.h
@@ -70,6 +70,7 @@ int bch2_move_data(struct bch_fs *,
int __bch2_evacuate_bucket(struct btree_trans *,
struct moving_context *,
+ struct move_bucket_in_flight *,
struct bpos, int,
struct data_update_opts);
int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h
index 9df6d181..285ffdb7 100644
--- a/libbcachefs/move_types.h
+++ b/libbcachefs/move_types.h
@@ -16,4 +16,10 @@ struct bch_move_stats {
atomic64_t sectors_raced;
};
+struct move_bucket_in_flight {
+ struct bpos bucket;
+ u8 gen;
+ atomic_t count;
+};
+
#endif /* _BCACHEFS_MOVE_TYPES_H */
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index 80f92276..79aaa45f 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -56,8 +56,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
a = bch2_alloc_to_v4(k, &_a);
*gen = a->gen;
- ret = (a->data_type == BCH_DATA_btree ||
- a->data_type == BCH_DATA_user) &&
+ ret = data_type_movable(a->data_type) &&
a->fragmentation_lru &&
a->fragmentation_lru <= time;
@@ -72,47 +71,44 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
return ret;
}
-struct copygc_bucket_in_flight {
- struct bpos bucket;
- u8 gen;
- struct moving_context ctxt;
-};
-
-typedef FIFO(struct copygc_bucket_in_flight) copygc_buckets_in_flight;
+typedef FIFO(struct move_bucket_in_flight) move_buckets_in_flight;
-struct copygc_bucket {
+struct move_bucket {
struct bpos bucket;
u8 gen;
};
-typedef DARRAY(struct copygc_bucket) copygc_buckets;
+typedef DARRAY(struct move_bucket) move_buckets;
-static int copygc_bucket_cmp(const void *_l, const void *_r)
+static int move_bucket_cmp(const void *_l, const void *_r)
{
- const struct copygc_bucket *l = _l;
- const struct copygc_bucket *r = _r;
+ const struct move_bucket *l = _l;
+ const struct move_bucket *r = _r;
return bpos_cmp(l->bucket, r->bucket) ?: cmp_int(l->gen, r->gen);
}
-static bool bucket_in_flight(copygc_buckets *buckets_sorted, struct copygc_bucket b)
+static bool bucket_in_flight(move_buckets *buckets_sorted, struct move_bucket b)
{
return bsearch(&b,
buckets_sorted->data,
buckets_sorted->nr,
sizeof(buckets_sorted->data[0]),
- copygc_bucket_cmp) != NULL;
+ move_bucket_cmp) != NULL;
}
-static void copygc_buckets_wait(struct btree_trans *trans,
- copygc_buckets_in_flight *buckets_in_flight,
- size_t nr, bool verify_evacuated)
+static void move_buckets_wait(struct btree_trans *trans,
+ struct moving_context *ctxt,
+ move_buckets_in_flight *buckets_in_flight,
+ size_t nr, bool verify_evacuated)
{
while (!fifo_empty(buckets_in_flight)) {
- struct copygc_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight);
+ struct move_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight);
- if (fifo_used(buckets_in_flight) <= nr &&
- closure_nr_remaining(&i->ctxt.cl) != 1)
+ if (fifo_used(buckets_in_flight) > nr)
+ move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count));
+
+ if (atomic_read(&i->count))
break;
/*
@@ -120,31 +116,34 @@ static void copygc_buckets_wait(struct btree_trans *trans,
* reads, which inits another btree_trans; this one must be
* unlocked:
*/
- bch2_trans_unlock(trans);
- bch2_moving_ctxt_exit(&i->ctxt);
if (verify_evacuated)
bch2_verify_bucket_evacuated(trans, i->bucket, i->gen);
buckets_in_flight->front++;
}
+
+ bch2_trans_unlock(trans);
}
static int bch2_copygc_get_buckets(struct btree_trans *trans,
- copygc_buckets_in_flight *buckets_in_flight,
- copygc_buckets *buckets)
+ struct moving_context *ctxt,
+ move_buckets_in_flight *buckets_in_flight,
+ move_buckets *buckets)
{
struct btree_iter iter;
- copygc_buckets buckets_sorted = { 0 };
- struct copygc_bucket_in_flight *i;
+ move_buckets buckets_sorted = { 0 };
+ struct move_bucket_in_flight *i;
struct bkey_s_c k;
- size_t fifo_iter;
+ size_t fifo_iter, nr_to_get;
int ret;
- copygc_buckets_wait(trans, buckets_in_flight, buckets_in_flight->size / 2, true);
+ move_buckets_wait(trans, ctxt, buckets_in_flight, buckets_in_flight->size / 2, true);
+
+ nr_to_get = max(16UL, fifo_used(buckets_in_flight) / 4);
fifo_for_each_entry_ptr(i, buckets_in_flight, fifo_iter) {
- ret = darray_push(&buckets_sorted, ((struct copygc_bucket) {i->bucket, i->gen}));
+ ret = darray_push(&buckets_sorted, ((struct move_bucket) {i->bucket, i->gen}));
if (ret) {
- bch_err(trans->c, "error allocating copygc_buckets_sorted");
+ bch_err(trans->c, "error allocating move_buckets_sorted");
goto err;
}
}
@@ -152,19 +151,19 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
sort(buckets_sorted.data,
buckets_sorted.nr,
sizeof(buckets_sorted.data[0]),
- copygc_bucket_cmp,
+ move_bucket_cmp,
NULL);
ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
0, k, ({
- struct copygc_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) };
+ struct move_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) };
int ret = 0;
if (!bucket_in_flight(&buckets_sorted, b) &&
bch2_bucket_is_movable(trans, b.bucket, lru_pos_time(k.k->p), &b.gen))
- ret = darray_push(buckets, b) ?: buckets->nr >= fifo_free(buckets_in_flight);
+ ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
ret;
}));
@@ -175,16 +174,17 @@ err:
}
static int bch2_copygc(struct btree_trans *trans,
- copygc_buckets_in_flight *buckets_in_flight,
- struct bch_move_stats *stats)
+ struct moving_context *ctxt,
+ move_buckets_in_flight *buckets_in_flight)
{
struct bch_fs *c = trans->c;
+ struct bch_move_stats move_stats;
struct data_update_opts data_opts = {
.btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
};
- copygc_buckets buckets = { 0 };
- struct copygc_bucket_in_flight *f;
- struct copygc_bucket *i;
+ move_buckets buckets = { 0 };
+ struct move_bucket_in_flight *f;
+ struct move_bucket *i;
int ret = 0;
ret = bch2_btree_write_buffer_flush(trans);
@@ -192,7 +192,10 @@ static int bch2_copygc(struct btree_trans *trans,
__func__, bch2_err_str(ret)))
return ret;
- ret = bch2_copygc_get_buckets(trans, buckets_in_flight, &buckets);
+ bch2_move_stats_init(&move_stats, "copygc");
+ ctxt->stats = &move_stats;
+
+ ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
if (ret)
goto err;
@@ -203,11 +206,9 @@ static int bch2_copygc(struct btree_trans *trans,
f = fifo_push_ref(buckets_in_flight);
f->bucket = i->bucket;
f->gen = i->gen;
- bch2_moving_ctxt_init(&f->ctxt, c, NULL, NULL, //stats,
- writepoint_ptr(&c->copygc_write_point),
- false);
+ atomic_set(&f->count, 0);
- ret = __bch2_evacuate_bucket(trans, &f->ctxt, f->bucket, f->gen, data_opts);
+ ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket, f->gen, data_opts);
if (ret)
goto err;
}
@@ -221,7 +222,8 @@ err:
if (ret < 0 && !bch2_err_matches(ret, EROFS))
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
- trace_and_count(c, copygc, c, atomic64_read(&stats->sectors_moved), 0, 0, 0);
+ trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
+ ctxt->stats = NULL;
return ret;
}
@@ -244,13 +246,18 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
struct bch_dev *ca;
unsigned dev_idx;
s64 wait = S64_MAX, fragmented_allowed, fragmented;
+ unsigned i;
for_each_rw_member(ca, c, dev_idx) {
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_stripe) *
ca->mi.bucket_size) >> 1);
- fragmented = usage.d[BCH_DATA_user].fragmented;
+ fragmented = 0;
+
+ for (i = 0; i < BCH_DATA_NR; i++)
+ if (data_type_movable(i))
+ fragmented += usage.d[i].fragmented;
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
}
@@ -274,32 +281,34 @@ static int bch2_copygc_thread(void *arg)
{
struct bch_fs *c = arg;
struct btree_trans trans;
- struct bch_move_stats move_stats;
+ struct moving_context ctxt;
struct io_clock *clock = &c->io_clock[WRITE];
- copygc_buckets_in_flight copygc_buckets;
+ move_buckets_in_flight move_buckets;
u64 last, wait;
int ret = 0;
- if (!init_fifo(&copygc_buckets, 1 << 14, GFP_KERNEL)) {
+ if (!init_fifo(&move_buckets, 1 << 14, GFP_KERNEL)) {
bch_err(c, "error allocating copygc buckets in flight");
return -ENOMEM;
}
set_freezable();
- bch2_move_stats_init(&move_stats, "copygc");
bch2_trans_init(&trans, c, 0, 0);
+ bch2_moving_ctxt_init(&ctxt, c, NULL, NULL,
+ writepoint_ptr(&c->copygc_write_point),
+ false);
while (!ret && !kthread_should_stop()) {
bch2_trans_unlock(&trans);
-
- try_to_freeze();
cond_resched();
- kthread_wait(freezing(current) || c->copy_gc_enabled);
+ if (!c->copy_gc_enabled) {
+ move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
+ kthread_wait_freezable(c->copy_gc_enabled);
+ }
if (unlikely(freezing(current))) {
- copygc_buckets_wait(&trans, &copygc_buckets, 0, true);
- bch2_trans_unlock(&trans);
+ move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
__refrigerator(false);
continue;
}
@@ -308,6 +317,7 @@ static int bch2_copygc_thread(void *arg)
wait = bch2_copygc_wait_amount(c);
if (wait > clock->max_slop) {
+ move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
trace_and_count(c, copygc_wait, c, wait, last + wait);
c->copygc_wait = last + wait;
bch2_kthread_io_clock_wait(clock, last + wait,
@@ -318,15 +328,15 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
c->copygc_running = true;
- ret = bch2_copygc(&trans, &copygc_buckets, &move_stats);
+ ret = bch2_copygc(&trans, &ctxt, &move_buckets);
c->copygc_running = false;
wake_up(&c->copygc_running_wq);
}
- copygc_buckets_wait(&trans, &copygc_buckets, 0, !ret);
- free_fifo(&copygc_buckets);
+ bch2_moving_ctxt_exit(&ctxt);
bch2_trans_exit(&trans);
+ free_fifo(&move_buckets);
return 0;
}
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 76c2691a..afbf82d6 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -329,22 +329,22 @@ enum opt_type {
x(norecovery, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Don't replay the journal") \
x(keep_journal, u8, \
0, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Don't free journal entries/keys after startup")\
x(read_entire_journal, u8, \
0, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Read all journal entries, not just dirty ones")\
x(read_journal_only, u8, \
0, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Only read the journal, skip the rest of recovery")\
x(journal_transaction_names, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
@@ -354,7 +354,7 @@ enum opt_type {
x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Don't open device in exclusive mode") \
x(direct_io, u8, \
OPT_FS|OPT_MOUNT, \
@@ -364,38 +364,38 @@ enum opt_type {
x(sb, u64, \
OPT_MOUNT, \
OPT_UINT(0, S64_MAX), \
- BCH2_NO_SB_OPT, BCH_SB_SECTOR, \
+ BCH2_NO_SB_OPT, BCH_SB_SECTOR, \
"offset", "Sector offset of superblock") \
x(read_only, u8, \
OPT_FS, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, NULL) \
x(nostart, u8, \
0, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Don\'t start filesystem, only open devices") \
x(reconstruct_alloc, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Reconstruct alloc btree") \
x(version_upgrade, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Set superblock to latest version,\n" \
"allowing any new features to be used") \
x(buckets_nouse, u8, \
0, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Allocate the buckets_nouse bitmap") \
x(project, u8, \
OPT_INODE, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, NULL) \
x(nocow, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \
@@ -411,9 +411,9 @@ enum opt_type {
NULL, "Enable nocow mode: enables runtime locking in\n"\
"data move path needed if nocow will ever be in use\n")\
x(no_data_io, u8, \
- OPT_FS|OPT_MOUNT, \
+ OPT_MOUNT, \
OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ BCH2_NO_SB_OPT, false, \
NULL, "Skip submit_bio() for data reads and writes, " \
"for performance testing purposes") \
x(fs_size, u64, \
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index 87446f7b..d2e6adc1 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -233,7 +233,13 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
orig->k.type = KEY_TYPE_reflink_p;
r_p = bkey_i_to_reflink_p(orig);
set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
+
+ /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+ __underlying_memset(&r_p->v, 0, sizeof(r_p->v));
+#else
memset(&r_p->v, 0, sizeof(r_p->v));
+#endif
r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index bf5ffb47..56c21c61 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -761,10 +761,10 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
}
}
-int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
+int _bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
{
while (size) {
- struct page *page = alloc_page(gfp_mask);
+ struct page *page = _alloc_pages(gfp_mask, 0);
unsigned len = min_t(size_t, PAGE_SIZE, size);
if (!page)
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index d994c157..ecfe5401 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -60,12 +60,14 @@ static inline void vpfree(void *p, size_t size)
free_pages((unsigned long) p, get_order(size));
}
-static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
+static inline void *_vpmalloc(size_t size, gfp_t gfp_mask)
{
- return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
+ return (void *) _get_free_pages(gfp_mask|__GFP_NOWARN,
get_order(size)) ?:
__vmalloc(size, gfp_mask);
}
+#define vpmalloc(_size, _gfp) \
+ alloc_hooks(_vpmalloc(_size, _gfp), void *, NULL)
static inline void kvpfree(void *p, size_t size)
{
@@ -75,12 +77,14 @@ static inline void kvpfree(void *p, size_t size)
vpfree(p, size);
}
-static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
+static inline void *_kvpmalloc(size_t size, gfp_t gfp_mask)
{
return size < PAGE_SIZE
- ? kmalloc(size, gfp_mask)
- : vpmalloc(size, gfp_mask);
+ ? _kmalloc(size, gfp_mask)
+ : _vpmalloc(size, gfp_mask);
}
+#define kvpmalloc(_size, _gfp) \
+ alloc_hooks(_kvpmalloc(_size, _gfp), void *, NULL)
int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
@@ -530,7 +534,9 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
}
void bch2_bio_map(struct bio *bio, void *base, size_t);
-int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
+int _bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
+#define bch2_bio_alloc_pages(_bio, _size, _gfp) \
+ alloc_hooks(_bch2_bio_alloc_pages(_bio, _size, _gfp), int, -ENOMEM)
static inline sector_t bdev_sectors(struct block_device *bdev)
{
@@ -566,11 +572,9 @@ do { \
#define kthread_wait_freezable(cond) \
({ \
int _ret = 0; \
- bool frozen; \
- \
while (1) { \
set_current_state(TASK_INTERRUPTIBLE); \
- if (kthread_freezable_should_stop(&frozen)) { \
+ if (kthread_should_stop()) { \
_ret = -1; \
break; \
} \
@@ -579,6 +583,7 @@ do { \
break; \
\
schedule(); \
+ try_to_freeze(); \
} \
set_current_state(TASK_RUNNING); \
_ret; \