aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-02-26 21:36:39 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-02-27 21:36:36 -0500
commit30cca2e94d0dfa8c3151daf1393f402d32bb9407 (patch)
treecbf1b7ccfdcaace597389147c27031c0e8106f12
parentbf359ac1ad97929e1023632aafb272d655504516 (diff)
downloadbcachefs-tools-30cca2e94d0dfa8c3151daf1393f402d32bb9407.tar.gz
Update bcachefs sources to ca97ee3577 bcachefs: bch2_btree_iter_peek_and_restart_outlined()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/trace/events/bcachefs.h20
-rw-r--r--libbcachefs/alloc_background.c27
-rw-r--r--libbcachefs/alloc_foreground.c357
-rw-r--r--libbcachefs/alloc_foreground.h9
-rw-r--r--libbcachefs/alloc_types.h3
-rw-r--r--libbcachefs/backpointers.c31
-rw-r--r--libbcachefs/bcachefs.h6
-rw-r--r--libbcachefs/btree_iter.c12
-rw-r--r--libbcachefs/btree_iter.h2
-rw-r--r--libbcachefs/btree_write_buffer.c9
-rw-r--r--libbcachefs/data_update.c54
-rw-r--r--libbcachefs/ec.c131
-rw-r--r--libbcachefs/extents.c70
-rw-r--r--libbcachefs/extents.h14
-rw-r--r--libbcachefs/io.c2
-rw-r--r--libbcachefs/journal.c3
-rw-r--r--libbcachefs/keylist.c16
-rw-r--r--libbcachefs/keylist.h1
-rw-r--r--libbcachefs/lru.c37
-rw-r--r--libbcachefs/move.c3
-rw-r--r--libbcachefs/opts.h6
-rw-r--r--libbcachefs/sysfs.c5
-rw-r--r--linux/blkdev.c8
25 files changed, 524 insertions, 305 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 93724d8b..44599a02 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-8e1519ccb62b76736d5b9ca97e58b41ed9a11274
+ca97ee357774427208e4c251bfaa5957ae7f8c2c
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 01b3d4ad..f78621d8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -70,6 +70,7 @@ static inline void submit_bio(struct bio *bio)
}
int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t);
+int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsigned);
#define bdev_get_queue(bdev) (&((bdev)->queue))
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index d1e2f979..ae184220 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -516,7 +516,6 @@ DEFINE_EVENT(bch_fs, gc_gens_end,
DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
- bool user,
u64 bucket,
u64 free,
u64 avail,
@@ -525,14 +524,13 @@ DECLARE_EVENT_CLASS(bucket_alloc,
struct bucket_alloc_state *s,
bool nonblocking,
const char *err),
- TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err),
TP_STRUCT__entry(
- __field(dev_t, dev )
+ __field(u8, dev )
__array(char, reserve, 16 )
- __field(bool, user )
__field(u64, bucket )
__field(u64, free )
__field(u64, avail )
@@ -548,9 +546,8 @@ DECLARE_EVENT_CLASS(bucket_alloc,
),
TP_fast_assign(
- __entry->dev = ca->dev;
+ __entry->dev = ca->dev_idx;
strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
- __entry->user = user;
__entry->bucket = bucket;
__entry->free = free;
__entry->avail = avail;
@@ -565,10 +562,9 @@ DECLARE_EVENT_CLASS(bucket_alloc,
strscpy(__entry->err, err, sizeof(__entry->err));
),
- TP_printk("%d,%d reserve %s user %u bucket %llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
+ TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
__entry->reserve,
- __entry->user,
+ __entry->dev,
__entry->bucket,
__entry->free,
__entry->avail,
@@ -585,7 +581,6 @@ DECLARE_EVENT_CLASS(bucket_alloc,
DEFINE_EVENT(bucket_alloc, bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
- bool user,
u64 bucket,
u64 free,
u64 avail,
@@ -594,14 +589,13 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc,
struct bucket_alloc_state *s,
bool nonblocking,
const char *err),
- TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err)
);
DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
- bool user,
u64 bucket,
u64 free,
u64 avail,
@@ -610,7 +604,7 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
struct bucket_alloc_state *s,
bool nonblocking,
const char *err),
- TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err)
);
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index b39a4533..5f4bb82c 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -2175,21 +2175,24 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
}
mutex_unlock(&c->btree_reserve_cache_lock);
- while (1) {
- struct open_bucket *ob;
-
- spin_lock(&c->freelist_lock);
- if (!ca->open_buckets_partial_nr) {
+ spin_lock(&c->freelist_lock);
+ i = 0;
+ while (i < c->open_buckets_partial_nr) {
+ struct open_bucket *ob =
+ c->open_buckets + c->open_buckets_partial[i];
+
+ if (ob->dev == ca->dev_idx) {
+ swap(c->open_buckets_partial[i],
+ c->open_buckets_partial[--c->open_buckets_partial_nr]);
+ ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
- break;
+ bch2_open_bucket_put(c, ob);
+ spin_lock(&c->freelist_lock);
+ } else {
+ i++;
}
- ob = c->open_buckets +
- ca->open_buckets_partial[--ca->open_buckets_partial_nr];
- ob->on_partial_list = false;
- spin_unlock(&c->freelist_lock);
-
- bch2_open_bucket_put(c, ob);
}
+ spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index affddf1f..023b62c5 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -154,26 +154,17 @@ static void open_bucket_free_unused(struct bch_fs *c,
struct write_point *wp,
struct open_bucket *ob)
{
- struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
- bool may_realloc = wp->data_type == BCH_DATA_user;
-
- BUG_ON(ca->open_buckets_partial_nr >
- ARRAY_SIZE(ca->open_buckets_partial));
-
- if (ca->open_buckets_partial_nr <
- ARRAY_SIZE(ca->open_buckets_partial) &&
- may_realloc) {
- spin_lock(&c->freelist_lock);
- ob->on_partial_list = true;
- ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
- ob - c->open_buckets;
- spin_unlock(&c->freelist_lock);
+ BUG_ON(c->open_buckets_partial_nr >=
+ ARRAY_SIZE(c->open_buckets_partial));
- closure_wake_up(&c->open_buckets_wait);
- closure_wake_up(&c->freelist_wait);
- } else {
- bch2_open_bucket_put(c, ob);
- }
+ spin_lock(&c->freelist_lock);
+ ob->on_partial_list = true;
+ c->open_buckets_partial[c->open_buckets_partial_nr++] =
+ ob - c->open_buckets;
+ spin_unlock(&c->freelist_lock);
+
+ closure_wake_up(&c->open_buckets_wait);
+ closure_wake_up(&c->freelist_wait);
}
/* _only_ for allocating the journal on a new device: */
@@ -259,7 +250,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ob->valid = true;
ob->sectors_free = ca->mi.bucket_size;
- ob->alloc_reserve = reserve;
ob->dev = ca->dev_idx;
ob->gen = a->gen;
ob->bucket = bucket;
@@ -386,32 +376,6 @@ err:
return ob;
}
-static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
- enum alloc_reserve reserve)
-{
- struct open_bucket *ob;
- int i;
-
- spin_lock(&c->freelist_lock);
-
- for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
- ob = c->open_buckets + ca->open_buckets_partial[i];
-
- if (reserve <= ob->alloc_reserve) {
- array_remove_item(ca->open_buckets_partial,
- ca->open_buckets_partial_nr,
- i);
- ob->on_partial_list = false;
- ob->alloc_reserve = reserve;
- spin_unlock(&c->freelist_lock);
- return ob;
- }
- }
-
- spin_unlock(&c->freelist_lock);
- return NULL;
-}
-
/*
* This path is for before the freespace btree is initialized:
*
@@ -535,7 +499,6 @@ again:
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
- bool may_alloc_partial,
struct closure *cl,
struct bch_dev_usage *usage)
{
@@ -574,12 +537,6 @@ again:
if (waiting)
closure_wake_up(&c->freelist_wait);
-
- if (may_alloc_partial) {
- ob = try_alloc_partial_bucket(c, ca, reserve);
- if (ob)
- return ob;
- }
alloc:
ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
@@ -599,7 +556,6 @@ err:
if (!IS_ERR(ob))
trace_and_count(c, bucket_alloc, ca,
bch2_alloc_reserves[reserve],
- may_alloc_partial,
ob->bucket,
usage->d[BCH_DATA_free].buckets,
avail,
@@ -611,7 +567,6 @@ err:
else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
trace_and_count(c, bucket_alloc_fail, ca,
bch2_alloc_reserves[reserve],
- may_alloc_partial,
0,
usage->d[BCH_DATA_free].buckets,
avail,
@@ -626,7 +581,6 @@ err:
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve,
- bool may_alloc_partial,
struct closure *cl)
{
struct bch_dev_usage usage;
@@ -634,7 +588,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
- may_alloc_partial, cl, &usage)));
+ cl, &usage)));
return ob;
}
@@ -691,12 +645,10 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
}
-#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
-#define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
-
-static void add_new_bucket(struct bch_fs *c,
+static int add_new_bucket(struct bch_fs *c,
struct open_buckets *ptrs,
struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned flags,
@@ -705,12 +657,19 @@ static void add_new_bucket(struct bch_fs *c,
unsigned durability =
bch_dev_bkey_exists(c, ob->dev)->mi.durability;
+ BUG_ON(*nr_effective >= nr_replicas);
+
__clear_bit(ob->dev, devs_may_alloc->d);
- *nr_effective += (flags & BUCKET_ALLOC_USE_DURABILITY)
- ? durability : 1;
+ *nr_effective += durability;
*have_cache |= !durability;
ob_push(c, ptrs, ob);
+
+ if (*nr_effective >= nr_replicas)
+ return 1;
+ if (ob->ec)
+ return 1;
+ return 0;
}
int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
@@ -720,8 +679,8 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
+ enum bch_data_type data_type,
enum alloc_reserve reserve,
- unsigned flags,
struct closure *cl)
{
struct bch_fs *c = trans->c;
@@ -754,8 +713,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
- ob = bch2_bucket_alloc_trans(trans, ca, reserve,
- flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
+ ob = bch2_bucket_alloc_trans(trans, ca, reserve, cl, &usage);
if (!IS_ERR(ob))
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
percpu_ref_put(&ca->ref);
@@ -767,10 +725,11 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
- add_new_bucket(c, ptrs, devs_may_alloc,
- nr_effective, have_cache, flags, ob);
+ ob->data_type = data_type;
- if (*nr_effective >= nr_replicas) {
+ if (add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, 0, ob)) {
ret = 0;
break;
}
@@ -792,7 +751,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
u16 target,
- unsigned erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
@@ -805,9 +763,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct open_bucket *ob;
struct bch_dev *ca;
unsigned i, ec_idx;
-
- if (!erasure_code)
- return 0;
+ int ret = 0;
if (nr_replicas < 2)
return 0;
@@ -842,54 +798,122 @@ got_bucket:
ob->ec_idx = ec_idx;
ob->ec = h->s;
- add_new_bucket(c, ptrs, devs_may_alloc,
- nr_effective, have_cache, flags, ob);
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob);
atomic_inc(&h->s->pin);
out_put_head:
bch2_ec_stripe_head_put(c, h);
- return 0;
+ return ret;
}
/* Sector allocator */
-static void get_buckets_from_writepoint(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- unsigned flags,
- bool need_ec)
+static bool want_bucket(struct bch_fs *c,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ bool *have_cache, bool ec,
+ struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+
+ if (!test_bit(ob->dev, devs_may_alloc->d))
+ return false;
+
+ if (ob->data_type != wp->data_type)
+ return false;
+
+ if (!ca->mi.durability &&
+ (wp->data_type != BCH_DATA_user || !*have_cache))
+ return false;
+
+ if (ec != (ob->ec != NULL))
+ return false;
+
+ return true;
+}
+
+static int bucket_alloc_set_writepoint(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ bool ec, unsigned flags)
{
struct open_buckets ptrs_skip = { .nr = 0 };
struct open_bucket *ob;
unsigned i;
+ int ret = 0;
open_bucket_for_each(c, &wp->ptrs, ob, i) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-
- if (*nr_effective < nr_replicas &&
- test_bit(ob->dev, devs_may_alloc->d) &&
- (ca->mi.durability ||
- (wp->data_type == BCH_DATA_user && !*have_cache)) &&
- (ob->ec || !need_ec)) {
- add_new_bucket(c, ptrs, devs_may_alloc,
- nr_effective, have_cache,
- flags, ob);
- } else {
+ if (!ret && want_bucket(c, wp, devs_may_alloc,
+ have_cache, ec, ob))
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob);
+ else
ob_push(c, &ptrs_skip, ob);
- }
}
wp->ptrs = ptrs_skip;
+
+ return ret;
}
-static int open_bucket_add_buckets(struct btree_trans *trans,
+static int bucket_alloc_set_partial(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache, bool ec,
+ enum alloc_reserve reserve,
+ unsigned flags)
+{
+ int i, ret = 0;
+
+ if (!c->open_buckets_partial_nr)
+ return 0;
+
+ spin_lock(&c->freelist_lock);
+
+ for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
+ struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
+
+ if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+ struct bch_dev_usage usage;
+ u64 avail;
+
+ bch2_dev_usage_read_fast(ca, &usage);
+ avail = dev_buckets_free(ca, usage, reserve);
+ if (!avail)
+ continue;
+
+ array_remove_item(c->open_buckets_partial,
+ c->open_buckets_partial_nr,
+ i);
+ ob->on_partial_list = false;
+
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob);
+ if (ret)
+ break;
+ }
+ }
+
+ spin_unlock(&c->freelist_lock);
+ return ret;
+}
+
+static int __open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_list *devs_have,
u16 target,
- unsigned erasure_code,
+ bool erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
@@ -901,8 +925,8 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
- int ret;
unsigned i;
+ int ret;
rcu_read_lock();
devs = target_rw_devs(c, wp->data_type, target);
@@ -915,52 +939,82 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);
+ if (erasure_code && ec_open_bucket(c, ptrs))
+ return 0;
+
+ ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
+ nr_replicas, nr_effective,
+ have_cache, erasure_code, flags);
+ if (ret)
+ return ret;
+
+ ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
+ nr_replicas, nr_effective,
+ have_cache, erasure_code, reserve, flags);
+ if (ret)
+ return ret;
+
if (erasure_code) {
- if (!ec_open_bucket(c, ptrs)) {
- get_buckets_from_writepoint(c, ptrs, wp, &devs,
- nr_replicas, nr_effective,
- have_cache, flags, true);
- if (*nr_effective >= nr_replicas)
- return 0;
+ ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
+ target,
+ nr_replicas, nr_effective,
+ have_cache, flags, _cl);
+ } else {
+retry_blocking:
+ /*
+ * Try nonblocking first, so that if one device is full we'll try from
+ * other devices:
+ */
+ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
+ nr_replicas, nr_effective, have_cache,
+ wp->data_type, reserve, cl);
+ if (ret &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+ !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
+ !cl && _cl) {
+ cl = _cl;
+ goto retry_blocking;
}
- if (!ec_open_bucket(c, ptrs)) {
- ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
- target, erasure_code,
- nr_replicas, nr_effective,
- have_cache, flags, _cl);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
- bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
- bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
- return ret;
- if (*nr_effective >= nr_replicas)
- return 0;
- }
}
- get_buckets_from_writepoint(c, ptrs, wp, &devs,
- nr_replicas, nr_effective,
- have_cache, flags, false);
- if (*nr_effective >= nr_replicas)
- return 0;
+ return ret;
+}
-retry_blocking:
- /*
- * Try nonblocking first, so that if one device is full we'll try from
- * other devices:
- */
- ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
+static int open_bucket_add_buckets(struct btree_trans *trans,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_list *devs_have,
+ u16 target,
+ unsigned erasure_code,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum alloc_reserve reserve,
+ unsigned flags,
+ struct closure *cl)
+{
+ int ret;
+
+ if (erasure_code) {
+ ret = __open_bucket_add_buckets(trans, ptrs, wp,
+ devs_have, target, erasure_code,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
- if (ret &&
- !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
- !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
- !cl && _cl) {
- cl = _cl;
- goto retry_blocking;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
+ bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+ return ret;
+ if (*nr_effective >= nr_replicas)
+ return 0;
}
- return ret;
+ ret = __open_bucket_add_buckets(trans, ptrs, wp,
+ devs_have, target, false,
+ nr_replicas, nr_effective, have_cache,
+ reserve, flags, cl);
+ return ret < 0 ? ret : 0;
}
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
@@ -1159,14 +1213,10 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
struct open_bucket *ob;
struct open_buckets ptrs;
unsigned nr_effective, write_points_nr;
- unsigned ob_flags = 0;
bool have_cache;
int ret;
int i;
- if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
- ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
-
BUG_ON(!nr_replicas || !nr_replicas_required);
retry:
ptrs.nr = 0;
@@ -1176,9 +1226,6 @@ retry:
*wp_ret = wp = writepoint_find(trans, write_point.v);
- if (wp->data_type == BCH_DATA_user)
- ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
-
/* metadata may not allocate on cache devices: */
if (wp->data_type != BCH_DATA_user)
have_cache = true;
@@ -1188,13 +1235,13 @@ retry:
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
- ob_flags, cl);
+ flags, cl);
} else {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
- ob_flags, NULL);
+ flags, NULL);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done;
@@ -1203,7 +1250,7 @@ retry:
0, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
- ob_flags, cl);
+ flags, cl);
}
alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas);
@@ -1350,6 +1397,24 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
}
}
+void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ unsigned i;
+
+ spin_lock(&c->freelist_lock);
+ for (i = 0; i < c->open_buckets_partial_nr; i++) {
+ struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
+
+ prt_printf(out, "%zu ref %u type %s ec %u %u:%llu:%u\n",
+ ob - c->open_buckets,
+ atomic_read(&ob->pin),
+ bch2_data_types[ob->data_type],
+ ob->ec != NULL,
+ ob->dev, ob->bucket, ob->gen);
+ }
+ spin_unlock(&c->freelist_lock);
+}
+
static const char * const bch2_write_point_states[] = {
#define x(n) #n,
WRITE_POINT_STATES()
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
index ba7a87af..e9b3b142 100644
--- a/libbcachefs/alloc_foreground.h
+++ b/libbcachefs/alloc_foreground.h
@@ -31,8 +31,7 @@ void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
long bch2_bucket_alloc_new_fs(struct bch_dev *);
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
- enum alloc_reserve, bool,
- struct closure *);
+ enum alloc_reserve, struct closure *);
static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
struct open_bucket *ob)
@@ -152,8 +151,9 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *,
- unsigned, unsigned *, bool *, enum alloc_reserve,
- unsigned, struct closure *);
+ unsigned, unsigned *, bool *,
+ enum bch_data_type, enum alloc_reserve,
+ struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned,
@@ -221,6 +221,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
+void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h
index 2e6f4806..0739bf92 100644
--- a/libbcachefs/alloc_types.h
+++ b/libbcachefs/alloc_types.h
@@ -51,10 +51,9 @@ struct open_bucket {
* the block in the stripe this open_bucket corresponds to:
*/
u8 ec_idx;
- enum bch_data_type data_type:8;
+ enum bch_data_type data_type:6;
unsigned valid:1;
unsigned on_partial_list:1;
- unsigned alloc_reserve:3;
u8 dev;
u8 gen;
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index e001f419..a40c2612 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -932,11 +932,14 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bpos bucket,
u64 *bp_offset,
struct bbpos start,
- struct bbpos end)
+ struct bbpos end,
+ struct bpos *last_flushed_pos)
{
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bch_backpointer bp;
struct bbpos pos;
+ struct bpos bp_pos;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -957,17 +960,31 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
- if (fsck_err_on(!k.k, trans->c,
+ bp_pos = bucket_pos_to_bp(c, bucket,
+ max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
+
+ if (!k.k && !bpos_eq(*last_flushed_pos, bp_pos)) {
+ *last_flushed_pos = bp_pos;
+ pr_info("flushing at %llu:%llu",
+ last_flushed_pos->inode,
+ last_flushed_pos->offset);
+
+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ -BCH_ERR_transaction_restart_write_buffer_flush;
+ goto out;
+ }
+
+ if (fsck_err_on(!k.k, c,
"%s backpointer points to missing extent\n%s",
*bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree",
(bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
if (ret == -ENOENT)
- bch_err(trans->c, "backpointer at %llu not found", *bp_offset);
+ bch_err(c, "backpointer at %llu not found", *bp_offset);
}
-
- bch2_trans_iter_exit(trans, &iter);
+out:
fsck_err:
+ bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
return ret;
}
@@ -978,6 +995,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
{
struct btree_iter iter;
struct bkey_s_c k;
+ struct bpos last_flushed_pos = SPOS_MAX;
int ret = 0;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
@@ -987,7 +1005,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
while (!(ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
- check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
+ check_one_backpointer(trans, iter.pos, &bp_offset,
+ start, end, &last_flushed_pos))) &&
bp_offset < U64_MAX)
bp_offset++;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 7f9c1087..3f88e7ea 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -516,9 +516,6 @@ struct bch_dev {
unsigned nr_open_buckets;
unsigned nr_btree_reserve;
- open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
- open_bucket_idx_t open_buckets_partial_nr;
-
size_t inc_gen_needs_gc;
size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal;
@@ -859,6 +856,9 @@ struct bch_fs {
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
+ open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
+ open_bucket_idx_t open_buckets_partial_nr;
+
struct write_point btree_write_point;
struct write_point rebalance_write_point;
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index d5a9cfb0..c8b0cf5e 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2568,6 +2568,18 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter);
}
+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
+{
+ struct bkey_s_c k;
+
+ while (btree_trans_too_many_iters(iter->trans) ||
+ (k = bch2_btree_iter_peek_type(iter, iter->flags),
+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
+ bch2_trans_begin(iter->trans);
+
+ return k;
+}
+
/* new transactional stuff: */
#ifdef CONFIG_BCACHEFS_DEBUG
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 1225c4dd..448be089 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -596,6 +596,8 @@ static inline int btree_trans_too_many_iters(struct btree_trans *trans)
return 0;
}
+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
+
static inline struct bkey_s_c
__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags)
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index 6285532e..026c249a 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -64,6 +64,15 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
(*fast)++;
+
+ if (path->ref > 1) {
+ /*
+ * We can't clone a path that has write locks: if the path is
+ * shared, unlock before set_pos(), traverse():
+ */
+ bch2_btree_node_unlock_write(trans, path, path->l[0].b);
+ *write_locked = false;
+ }
return 0;
trans_commit:
return bch2_trans_update(trans, iter, &wb->k, 0) ?:
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index de0575f6..e1467e11 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -98,8 +98,10 @@ static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
- if (ptr->dev == dev)
- ptr->cached = true;
+ if (ptr->dev == dev) {
+ bch2_extent_ptr_set_cached(k, ptr);
+ return;
+ }
}
static int __bch2_data_update_index_update(struct btree_trans *trans,
@@ -295,15 +297,7 @@ out:
int bch2_data_update_index_update(struct bch_write_op *op)
{
- struct bch_fs *c = op->c;
- struct btree_trans trans;
- int ret;
-
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
- ret = __bch2_data_update_index_update(&trans, op);
- bch2_trans_exit(&trans);
-
- return ret;
+ return bch2_trans_run(op->c, __bch2_data_update_index_update(&trans, op));
}
void bch2_data_update_read_done(struct data_update *m,
@@ -326,8 +320,9 @@ void bch2_data_update_exit(struct data_update *update)
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr) {
- bch2_bucket_nocow_unlock(&c->nocow_locks,
- PTR_BUCKET_POS(c, ptr), 0);
+ if (c->opts.nocow_enabled)
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, ptr), 0);
percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref);
}
@@ -487,23 +482,26 @@ int bch2_data_update_init(struct btree_trans *trans,
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
m->op.incompressible = true;
- if (ctxt) {
- move_ctxt_wait_event(ctxt, trans,
- (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
- PTR_BUCKET_POS(c, &p.ptr), 0)) ||
- !atomic_read(&ctxt->read_sectors));
-
- if (!locked)
- bch2_bucket_nocow_lock(&c->nocow_locks,
- PTR_BUCKET_POS(c, &p.ptr), 0);
- } else {
- if (!bch2_bucket_nocow_trylock(&c->nocow_locks,
- PTR_BUCKET_POS(c, &p.ptr), 0)) {
- ret = -BCH_ERR_nocow_lock_blocked;
- goto err;
+ if (c->opts.nocow_enabled) {
+ if (ctxt) {
+ move_ctxt_wait_event(ctxt, trans,
+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0)) ||
+ !atomic_read(&ctxt->read_sectors));
+
+ if (!locked)
+ bch2_bucket_nocow_lock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0);
+ } else {
+ if (!bch2_bucket_nocow_trylock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0)) {
+ ret = -BCH_ERR_nocow_lock_blocked;
+ goto err;
+ }
}
+ ptrs_locked |= (1U << i);
}
- ptrs_locked |= (1U << i);
+
i++;
}
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 84d2a0c4..7bd68880 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -138,20 +138,28 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
- unsigned i;
+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
s->algorithm,
le16_to_cpu(s->sectors),
- s->nr_blocks - s->nr_redundant,
+ nr_data,
s->nr_redundant,
s->csum_type,
1U << s->csum_granularity_bits);
- for (i = 0; i < s->nr_blocks; i++)
- prt_printf(out, " %u:%llu:%u", s->ptrs[i].dev,
- (u64) s->ptrs[i].offset,
- stripe_blockcount_get(s, i));
+ for (i = 0; i < s->nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = s->ptrs + i;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ u32 offset;
+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+ prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
+ if (i < nr_data)
+ prt_printf(out, "#%u", stripe_blockcount_get(s, i));
+ if (ptr_stale(ca, ptr))
+ prt_printf(out, " stale");
+ }
}
/* returns blocknr in stripe that we matched: */
@@ -442,15 +450,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
percpu_ref_put(&ca->io_ref);
}
-static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
+static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
+ struct ec_stripe_buf *stripe)
{
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes,
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes,
POS(0, idx), BTREE_ITER_SLOTS);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
@@ -462,11 +469,15 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip
}
bkey_reassemble(&stripe->key.k_i, k);
err:
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
+static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
+{
+ return bch2_trans_run(c, get_stripe_key_trans(&trans, idx, stripe));
+}
+
/* recovery read path: */
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
{
@@ -865,25 +876,6 @@ err:
return ret;
}
-static void extent_stripe_ptr_add(struct bkey_s_extent e,
- struct ec_stripe_buf *s,
- struct bch_extent_ptr *ptr,
- unsigned block)
-{
- struct bch_extent_stripe_ptr *dst = (void *) ptr;
- union bch_extent_entry *end = extent_entry_last(e);
-
- memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
- e.k->u64s += sizeof(*dst) / sizeof(u64);
-
- *dst = (struct bch_extent_stripe_ptr) {
- .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
- .block = block,
- .redundancy = s->key.v.nr_redundant,
- .idx = s->key.k.p.offset,
- };
-}
-
static int ec_stripe_update_extent(struct btree_trans *trans,
struct bpos bucket, u8 gen,
struct ec_stripe_buf *s,
@@ -895,6 +887,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bkey_s_c k;
const struct bch_extent_ptr *ptr_c;
struct bch_extent_ptr *ptr, *ec_ptr = NULL;
+ struct bch_extent_stripe_ptr stripe_ptr;
struct bkey_i *n;
int ret, dev, block;
@@ -933,16 +926,27 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
dev = s->key.v.ptrs[block].dev;
- n = bch2_bkey_make_mut(trans, k);
+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr));
ret = PTR_ERR_OR_ZERO(n);
if (ret)
goto out;
+ bkey_reassemble(n, k);
+
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
BUG_ON(!ec_ptr);
- extent_stripe_ptr_add(bkey_i_to_s_extent(n), s, ec_ptr, block);
+ stripe_ptr = (struct bch_extent_stripe_ptr) {
+ .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
+ .block = block,
+ .redundancy = s->key.v.nr_redundant,
+ .idx = s->key.k.p.offset,
+ };
+
+ __extent_entry_insert(n,
+ (union bch_extent_entry *) ec_ptr,
+ (union bch_extent_entry *) &stripe_ptr);
ret = bch2_trans_update(trans, &iter, n, 0);
out:
@@ -999,6 +1003,35 @@ err:
return ret;
}
+static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
+ struct ec_stripe_new *s,
+ unsigned block,
+ struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+ unsigned offset = ca->mi.bucket_size - ob->sectors_free;
+ int ret;
+
+ if (!bch2_dev_get_ioref(ca, WRITE)) {
+ s->err = -EROFS;
+ return;
+ }
+
+ memset(s->new_stripe.data[block] + (offset << 9),
+ 0,
+ ob->sectors_free << 9);
+
+ ret = blkdev_issue_zeroout(ca->disk_sb.bdev,
+ ob->bucket * ca->mi.bucket_size + offset,
+ ob->sectors_free,
+ GFP_KERNEL, 0);
+
+ percpu_ref_put(&ca->io_ref);
+
+ if (ret)
+ s->err = ret;
+}
+
/*
* data buckets of new stripe all written: create the stripe
*/
@@ -1014,6 +1047,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
closure_sync(&s->iodone);
+ for (i = 0; i < nr_data; i++)
+ if (s->blocks[i]) {
+ ob = c->open_buckets + s->blocks[i];
+
+ if (ob->sectors_free)
+ zero_out_rest_of_ec_bucket(c, s, i, ob);
+ }
+
if (s->err) {
if (!bch2_err_matches(s->err, EROFS))
bch_err(c, "error creating stripe: error writing data buckets");
@@ -1155,9 +1196,6 @@ void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
{
struct ec_stripe_new *s = ob->ec;
- if (ob->sectors_free)
- s->err = -1;
-
ec_stripe_new_put(c, s);
}
@@ -1398,10 +1436,10 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
h->s->nr_parity,
&nr_have_parity,
&have_cache,
+ BCH_DATA_parity,
h->copygc
? RESERVE_movinggc
: RESERVE_none,
- 0,
cl);
open_bucket_for_each(c, &buckets, ob, i) {
@@ -1427,10 +1465,10 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
h->s->nr_data,
&nr_have_data,
&have_cache,
+ BCH_DATA_user,
h->copygc
? RESERVE_movinggc
: RESERVE_none,
- 0,
cl);
open_bucket_for_each(c, &buckets, ob, i) {
@@ -1486,8 +1524,9 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret;
}
-static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, struct ec_stripe_head *h)
+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
{
+ struct bch_fs *c = trans->c;
unsigned i;
s64 idx;
int ret;
@@ -1497,7 +1536,7 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, struct ec_stripe_head *
return -BCH_ERR_ENOSPC_stripe_reuse;
h->s->have_existing_stripe = true;
- ret = get_stripe_key(c, idx, &h->s->existing_stripe);
+ ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
if (ret) {
bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
return ret;
@@ -1626,7 +1665,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
goto err;
if (ret && needs_stripe_new)
- ret = __bch2_ec_stripe_head_reuse(c, h);
+ ret = __bch2_ec_stripe_head_reuse(trans, h);
if (ret) {
bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret));
goto err;
@@ -1771,6 +1810,7 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_fs_ec_exit(struct bch_fs *c)
{
struct ec_stripe_head *h;
+ unsigned i;
while (1) {
mutex_lock(&c->ec_stripe_head_lock);
@@ -1782,7 +1822,12 @@ void bch2_fs_ec_exit(struct bch_fs *c)
if (!h)
break;
- BUG_ON(h->s);
+ if (h->s) {
+ for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++)
+ BUG_ON(h->s->blocks[i]);
+
+ kfree(h->s);
+ }
kfree(h);
}
@@ -1801,6 +1846,8 @@ void bch2_fs_ec_init_early(struct bch_fs *c)
int bch2_fs_ec_init(struct bch_fs *c)
{
+ spin_lock_init(&c->ec_stripes_new_lock);
+
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index d01cec89..4fc581be 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -706,18 +706,6 @@ void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry
k->k.u64s -= extent_entry_u64s(entry);
}
-static inline void __extent_entry_insert(struct bkey_i *k,
- union bch_extent_entry *dst,
- union bch_extent_entry *new)
-{
- union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-
- memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
- dst, (u64 *) end - (u64 *) dst);
- k->k.u64s += extent_entry_u64s(new);
- memcpy_u64s_small(dst, new, extent_entry_u64s(new));
-}
-
void bch2_extent_ptr_decoded_append(struct bkey_i *k,
struct extent_ptr_decoded *p)
{
@@ -951,6 +939,29 @@ bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
return false;
}
+void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ union bch_extent_entry *entry;
+ union bch_extent_entry *ec = NULL;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (&entry->ptr == ptr) {
+ ptr->cached = true;
+ if (ec)
+ extent_entry_drop(k, ec);
+ return;
+ }
+
+ if (extent_entry_is_stripe_ptr(entry))
+ ec = entry;
+ else if (extent_entry_is_ptr(entry))
+ ec = NULL;
+ }
+
+ BUG();
+}
+
/*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc.
*
@@ -1094,7 +1105,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0;
- bool unwritten = false;
+ bool unwritten = false, have_ec = false, crc_since_last_ptr = false;
int ret;
if (bkey_is_btree_ptr(k.k))
@@ -1130,7 +1141,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -BCH_ERR_invalid_bkey;
}
+ if (entry->ptr.cached && have_ec) {
+ prt_printf(err, "cached, erasure coded ptr");
+ return -BCH_ERR_invalid_bkey;
+ }
+
unwritten = entry->ptr.unwritten;
+ have_ec = false;
+ crc_since_last_ptr = false;
nr_ptrs++;
break;
case BCH_EXTENT_ENTRY_crc32:
@@ -1164,17 +1182,43 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -BCH_ERR_invalid_bkey;
}
}
+
+ if (crc_since_last_ptr) {
+ prt_printf(err, "redundant crc entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+ crc_since_last_ptr = true;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
+ if (have_ec) {
+ prt_printf(err, "redundant stripe entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+ have_ec = true;
break;
}
}
+ if (!nr_ptrs) {
+ prt_str(err, "no ptrs");
+ return -BCH_ERR_invalid_bkey;
+ }
+
if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
prt_str(err, "too many ptrs");
return -BCH_ERR_invalid_bkey;
}
+ if (crc_since_last_ptr) {
+ prt_printf(err, "redundant crc entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+
+ if (have_ec) {
+ prt_printf(err, "redundant stripe entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+
return 0;
}
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 1d8f3b30..2e37543a 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -76,6 +76,18 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
return extent_entry_bytes(entry) / sizeof(u64);
}
+static inline void __extent_entry_insert(struct bkey_i *k,
+ union bch_extent_entry *dst,
+ union bch_extent_entry *new)
+{
+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
+
+ memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
+ dst, (u64 *) end - (u64 *) dst);
+ k->k.u64s += extent_entry_u64s(new);
+ memcpy_u64s_small(dst, new, extent_entry_u64s(new));
+}
+
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
@@ -655,6 +667,8 @@ bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
+void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
+
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 64925db2..15ce0657 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -1650,7 +1650,7 @@ static void __bch2_write(struct bch_write_op *op)
nofs_flags = memalloc_nofs_save();
- if (unlikely(op->opts.nocow)) {
+ if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
bch2_nocow_write(op);
if (op->flags & BCH_WRITE_DONE)
goto out_nofs_restore;
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 957eeece..e0c4f51a 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -789,8 +789,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break;
}
} else {
- ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
- false, cl);
+ ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, cl);
ret = PTR_ERR_OR_ZERO(ob[nr_got]);
if (ret)
break;
diff --git a/libbcachefs/keylist.c b/libbcachefs/keylist.c
index 29e51bde..cf5998e5 100644
--- a/libbcachefs/keylist.c
+++ b/libbcachefs/keylist.c
@@ -31,22 +31,6 @@ int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
return 0;
}
-void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
-{
- struct bkey_i *where;
-
- for_each_keylist_key(l, where)
- if (bpos_lt(insert->k.p, where->k.p))
- break;
-
- memmove_u64s_up((u64 *) where + insert->k.u64s,
- where,
- ((u64 *) l->top) - ((u64 *) where));
-
- l->top_p += insert->k.u64s;
- bkey_copy(where, insert);
-}
-
void bch2_keylist_pop_front(struct keylist *l)
{
l->top_p -= bch2_keylist_front(l)->k.u64s;
diff --git a/libbcachefs/keylist.h b/libbcachefs/keylist.h
index 635efb7e..fe759c70 100644
--- a/libbcachefs/keylist.h
+++ b/libbcachefs/keylist.h
@@ -5,7 +5,6 @@
#include "keylist_types.h"
int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
-void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
void bch2_keylist_pop_front(struct keylist *);
static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c
index 9eec12a9..e913b90f 100644
--- a/libbcachefs/lru.c
+++ b/libbcachefs/lru.c
@@ -4,6 +4,7 @@
#include "alloc_background.h"
#include "btree_iter.h"
#include "btree_update.h"
+#include "btree_write_buffer.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@@ -101,7 +102,8 @@ static const char * const bch2_lru_types[] = {
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
- struct bkey_s_c lru_k)
+ struct bkey_s_c lru_k,
+ struct bpos *last_flushed_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -137,19 +139,25 @@ static int bch2_check_lru_key(struct btree_trans *trans,
break;
}
- if (fsck_err_on(lru_k.k->type != KEY_TYPE_set ||
- lru_pos_time(lru_k.k->p) != idx, c,
- "incorrect lru entry: lru %s time %llu\n"
- " %s\n"
- " for %s",
- bch2_lru_types[type],
- lru_pos_time(lru_k.k->p),
- (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
- (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
- ret = bch2_btree_delete_at(trans, lru_iter, 0);
- if (ret)
- goto err;
+ if (lru_k.k->type != KEY_TYPE_set ||
+ lru_pos_time(lru_k.k->p) != idx) {
+ if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
+ *last_flushed_pos = lru_k.k->p;
+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ -BCH_ERR_transaction_restart_write_buffer_flush;
+ goto out;
+ }
+
+ if (fsck_err(c, "incorrect lru entry: lru %s time %llu\n"
+ " %s\n"
+ " for %s",
+ bch2_lru_types[type],
+ lru_pos_time(lru_k.k->p),
+ (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
+ (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
+ ret = bch2_btree_delete_at(trans, lru_iter, 0);
}
+out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
@@ -163,6 +171,7 @@ int bch2_check_lrus(struct bch_fs *c)
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
+ struct bpos last_flushed_pos = POS_MIN;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
@@ -170,7 +179,7 @@ int bch2_check_lrus(struct bch_fs *c)
ret = for_each_btree_key_commit(&trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
- bch2_check_lru_key(&trans, &iter, k));
+ bch2_check_lru_key(&trans, &iter, k, &last_flushed_pos));
bch2_trans_exit(&trans);
return ret;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 7dac9264..4ef7595f 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -227,7 +227,8 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
if (bkey_deleted(&n->k))
n->k.size = 0;
- return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ return bch2_trans_relock(trans) ?:
+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
}
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 304718a0..76c2691a 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -404,6 +404,12 @@ enum opt_type {
NULL, "Nocow mode: Writes will be done in place when possible.\n"\
"Snapshots and reflink will still caused writes to be COW\n"\
"Implicitly disables data checksumming, compression and encryption")\
+ x(nocow_enabled, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Enable nocow mode: enables runtime locking in\n"\
+ "data move path needed if nocow will ever be in use\n")\
x(no_data_io, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index b981c87e..a7582dd4 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -194,6 +194,7 @@ read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
read_attribute(open_buckets);
+read_attribute(open_buckets_partial);
read_attribute(write_points);
read_attribute(nocow_lock_table);
@@ -455,6 +456,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c);
+ if (attr == &sysfs_open_buckets_partial)
+ bch2_open_buckets_partial_to_text(out, c);
+
if (attr == &sysfs_write_points)
bch2_write_points_to_text(out, c);
@@ -663,6 +667,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes,
&sysfs_stripes_heap,
&sysfs_open_buckets,
+ &sysfs_open_buckets_partial,
&sysfs_write_points,
#ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs,
diff --git a/linux/blkdev.c b/linux/blkdev.c
index 0a5cedfe..805d55db 100644
--- a/linux/blkdev.c
+++ b/linux/blkdev.c
@@ -118,6 +118,14 @@ int blkdev_issue_discard(struct block_device *bdev,
return 0;
}
+int blkdev_issue_zeroout(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects,
+ gfp_t gfp_mask, unsigned flags)
+{
+ /* Not yet implemented: */
+ BUG();
+}
+
unsigned bdev_logical_block_size(struct block_device *bdev)
{
struct stat statbuf;