aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-11-25 21:51:30 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-11-25 21:51:30 -0500
commit3a0cc86e767b95366b9cbdba7a1453454f1b5a41 (patch)
tree56fc9015ed1c97b3a98d7592ad81a9d40a8d3f9b
parent138397d89212cd0b5abdbfdd644dc7702ea00f4a (diff)
downloadbcachefs-tools-3a0cc86e767b95366b9cbdba7a1453454f1b5a41.tar.gz
Update bcachefs sources to 8c94740b1bf8 bcachefs: Add missing vaidation for jset_entry_data_usage
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/alloc_background.c120
-rw-r--r--libbcachefs/alloc_background.h23
-rw-r--r--libbcachefs/alloc_foreground.c3
-rw-r--r--libbcachefs/bcachefs.h2
-rw-r--r--libbcachefs/bcachefs_format.h10
-rw-r--r--libbcachefs/bcachefs_ioctl.h17
-rw-r--r--libbcachefs/btree_gc.c3
-rw-r--r--libbcachefs/btree_trans_commit.c3
-rw-r--r--libbcachefs/btree_write_buffer.c16
-rw-r--r--libbcachefs/buckets.c83
-rw-r--r--libbcachefs/buckets.h1
-rw-r--r--libbcachefs/buckets_types.h2
-rw-r--r--libbcachefs/chardev.c84
-rw-r--r--libbcachefs/compress.c16
-rw-r--r--libbcachefs/data_update.c106
-rw-r--r--libbcachefs/data_update.h9
-rw-r--r--libbcachefs/errcode.h3
-rw-r--r--libbcachefs/extents.c30
-rw-r--r--libbcachefs/io_read.c2
-rw-r--r--libbcachefs/journal_io.c14
-rw-r--r--libbcachefs/lru.c61
-rw-r--r--libbcachefs/lru.h2
-rw-r--r--libbcachefs/move.c78
-rw-r--r--libbcachefs/move.h19
-rw-r--r--libbcachefs/movinggc.c13
-rw-r--r--libbcachefs/rebalance.c14
-rw-r--r--libbcachefs/recovery.c2
-rw-r--r--libbcachefs/replicas.c69
-rw-r--r--libbcachefs/replicas.h2
-rw-r--r--libbcachefs/sb-clean.c1
-rw-r--r--libbcachefs/sb-members.c5
-rw-r--r--libbcachefs/super-io.c2
-rw-r--r--libbcachefs/super.c42
-rw-r--r--libbcachefs/sysfs.c143
-rw-r--r--libbcachefs/trace.h34
36 files changed, 610 insertions, 426 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 4649f2ba..97936a15 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-783085c3cc440183ba5e987b1aa7791cc1ca42ba
+8c94740b1bf8645d3398170f41c9c88b78332252
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 1ed8506c..56a18ace 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -261,10 +261,8 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
- bkey_fsck_err_on(a.v->dirty_sectors ||
- a.v->cached_sectors ||
- a.v->stripe, c, err,
- alloc_key_empty_but_have_data,
+ bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe,
+ c, err, alloc_key_empty_but_have_data,
"empty data type free but have data");
break;
case BCH_DATA_sb:
@@ -272,22 +270,21 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
- bkey_fsck_err_on(!a.v->dirty_sectors, c, err,
- alloc_key_dirty_sectors_0,
+ bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
+ c, err, alloc_key_dirty_sectors_0,
"data_type %s but dirty_sectors==0",
bch2_data_types[a.v->data_type]);
break;
case BCH_DATA_cached:
bkey_fsck_err_on(!a.v->cached_sectors ||
- a.v->dirty_sectors ||
- a.v->stripe, c, err,
- alloc_key_cached_inconsistency,
+ bch2_bucket_sectors_dirty(*a.v) ||
+ a.v->stripe,
+ c, err, alloc_key_cached_inconsistency,
"data type inconsistency");
bkey_fsck_err_on(!a.v->io_time[READ] &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
- c, err,
- alloc_key_cached_but_read_time_zero,
+ c, err, alloc_key_cached_but_read_time_zero,
"cached bucket with read_time == 0");
break;
case BCH_DATA_stripe:
@@ -790,8 +787,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
- if (new_a->dirty_sectors > old_a->dirty_sectors ||
- new_a->cached_sectors > old_a->cached_sectors) {
+ if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
@@ -1509,6 +1505,27 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (a->data_type != BCH_DATA_cached)
return 0;
+ if (fsck_err_on(!a->io_time[READ], c,
+ alloc_key_cached_but_read_time_zero,
+ "cached bucket with read_time 0\n"
+ " %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ struct bkey_i_alloc_v4 *a_mut =
+ bch2_alloc_to_v4_mut(trans, alloc_k);
+ ret = PTR_ERR_OR_ZERO(a_mut);
+ if (ret)
+ goto err;
+
+ a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+ ret = bch2_trans_update(trans, alloc_iter,
+ &a_mut->k_i, BTREE_TRIGGER_NORUN);
+ if (ret)
+ goto err;
+
+ a = &a_mut->v;
+ }
+
lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
lru_pos(alloc_k.k->p.inode,
bucket_to_u64(alloc_k.k->p),
@@ -1517,41 +1534,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (ret)
return ret;
- if (fsck_err_on(!a->io_time[READ], c,
- alloc_key_cached_but_read_time_zero,
- "cached bucket with read_time 0\n"
- " %s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
- fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
+ if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
alloc_key_to_missing_lru_entry,
"missing lru entry\n"
" %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- u64 read_time = a->io_time[READ] ?:
- atomic64_read(&c->io_clock[READ].now);
-
ret = bch2_lru_set(trans,
alloc_k.k->p.inode,
bucket_to_u64(alloc_k.k->p),
- read_time);
+ a->io_time[READ]);
if (ret)
goto err;
-
- if (a->io_time[READ] != read_time) {
- struct bkey_i_alloc_v4 *a_mut =
- bch2_alloc_to_v4_mut(trans, alloc_k);
- ret = PTR_ERR_OR_ZERO(a_mut);
- if (ret)
- goto err;
-
- a_mut->v.io_time[READ] = read_time;
- ret = bch2_trans_update(trans, alloc_iter,
- &a_mut->k_i, BTREE_TRIGGER_NORUN);
- if (ret)
- goto err;
- }
}
err:
fsck_err:
@@ -1564,15 +1558,13 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
struct btree_iter iter;
struct bkey_s_c k;
- int ret = 0;
- ret = bch2_trans_run(c,
+ int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
bch2_check_alloc_to_lru_ref(trans, &iter)));
- if (ret)
- bch_err_fn(c, ret);
+ bch_err_fn(c, ret);
return ret;
}
@@ -1734,28 +1726,25 @@ void bch2_do_discards(struct bch_fs *c)
static int invalidate_one_bucket(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
+ struct bpos *last_flushed_pos,
s64 *nr_to_invalidate)
{
struct bch_fs *c = trans->c;
- struct btree_iter alloc_iter = { NULL };
- struct bkey_i_alloc_v4 *a = NULL;
- struct printbuf buf = PRINTBUF;
- struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
- unsigned cached_sectors;
int ret = 0;
if (*nr_to_invalidate <= 0)
return 1;
- if (!bch2_dev_bucket_exists(c, bucket)) {
- prt_str(&buf, "lru entry points to invalid bucket");
- goto err;
- }
+ ret = bch2_check_lru_key(trans, lru_iter, lru_k, last_flushed_pos);
+ if (ret)
+ return ret < 0 ? ret : 0;
+ struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
return 0;
- a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
+ struct btree_iter alloc_iter;
+ struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
@@ -1769,7 +1758,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused");
- cached_sectors = a->v.cached_sectors;
+ unsigned cached_sectors = a->v.cached_sectors;
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
a->v.gen++;
@@ -1791,28 +1780,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
--*nr_to_invalidate;
out:
bch2_trans_iter_exit(trans, &alloc_iter);
- printbuf_exit(&buf);
return ret;
-err:
- prt_str(&buf, "\n lru key: ");
- bch2_bkey_val_to_text(&buf, c, lru_k);
-
- prt_str(&buf, "\n lru entry: ");
- bch2_lru_pos_to_text(&buf, lru_iter->pos);
-
- prt_str(&buf, "\n alloc key: ");
- if (!a)
- bch2_bpos_to_text(&buf, bucket);
- else
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
-
- bch_err(c, "%s", buf.buf);
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) {
- bch2_inconsistent_error(c);
- ret = -EINVAL;
- }
-
- goto out;
}
static void bch2_do_invalidates_work(struct work_struct *work)
@@ -1822,6 +1790,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
+ struct bpos last_flushed_pos = POS_MIN;
unsigned i;
int ret = 0;
@@ -1837,7 +1806,8 @@ static void bch2_do_invalidates_work(struct work_struct *work)
lru_pos(ca->dev_idx, 0, 0),
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
BTREE_ITER_INTENT, k,
- invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
+ invalidate_one_bucket(trans, &iter, k, &last_flushed_pos,
+ &nr_to_invalidate));
if (ret < 0) {
percpu_ref_put(&ca->ref);
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 73faf99a..72bb8491 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
}
+static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a)
+{
+ return a.dirty_sectors + a.cached_sectors;
+}
+
+static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
+{
+ return a.dirty_sectors;
+}
+
+static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca,
+ struct bch_alloc_v4 a)
+{
+ unsigned d = bch2_bucket_sectors_dirty(a);
+
+ return d ? max(0U, ca->mi.bucket_size - d) : 0;
+}
+
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
{
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
@@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
struct bch_dev *ca)
{
if (!data_type_movable(a.data_type) ||
- a.dirty_sectors >= ca->mi.bucket_size)
+ !bch2_bucket_sectors_fragmented(ca, a))
return 0;
- return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
+ u64 d = bch2_bucket_sectors_dirty(a);
+ return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
}
static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index eef6fa8d..1ba0eeb7 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -1345,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
int ret;
int i;
+ if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+ erasure_code = false;
+
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
BUG_ON(!nr_replicas || !nr_replicas_required);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 53f93f03..295efeda 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -935,7 +935,7 @@ struct bch_fs {
mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
mempool_t decompress_workspace;
- ZSTD_parameters zstd_params;
+ size_t zstd_workspace_size;
struct crypto_shash *sha256;
struct crypto_sync_skcipher *chacha20;
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index ad0f298c..96778007 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -151,7 +151,11 @@ struct bpos {
#else
#error edit for your odd byteorder.
#endif
-} __packed __aligned(4);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
#define KEY_INODE_MAX ((__u64)~0ULL)
#define KEY_OFFSET_MAX ((__u64)~0ULL)
@@ -2203,8 +2207,8 @@ struct jset_entry_dev_usage {
__le32 dev;
__u32 pad;
- __le64 buckets_ec;
- __le64 _buckets_unavailable; /* No longer used */
+ __le64 _buckets_ec; /* No longer used */
+ __le64 _buckets_unavailable; /* No longer used */
struct jset_entry_dev_usage_type d[];
};
diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h
index 18eb3254..44ba7a87 100644
--- a/libbcachefs/bcachefs_ioctl.h
+++ b/libbcachefs/bcachefs_ioctl.h
@@ -81,6 +81,8 @@ struct bch_ioctl_incremental {
#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume)
#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume)
+#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2)
+
/* ioctl below act on a particular file, not the filesystem as a whole: */
#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
@@ -298,7 +300,20 @@ struct bch_ioctl_dev_usage {
__u64 buckets;
__u64 sectors;
__u64 fragmented;
- } d[BCH_DATA_NR];
+ } d[10];
+};
+
+struct bch_ioctl_dev_usage_v2 {
+ __u64 dev;
+ __u32 flags;
+ __u8 state;
+ __u8 nr_data_types;
+ __u8 pad[6];
+
+ __u32 bucket_size;
+ __u64 nr_buckets;
+
+ struct bch_ioctl_dev_usage_type d[0];
};
/*
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 7e5d52f8..90f5bcfa 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -1254,9 +1254,6 @@ static int bch2_gc_done(struct bch_fs *c,
copy_dev_field(dev_usage_fragmented_wrong,
d[i].fragmented, "%s fragmented", bch2_data_types[i]);
}
-
- copy_dev_field(dev_usage_buckets_ec_wrong,
- buckets_ec, "buckets_ec");
}
{
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 09e94cc4..7210d5c2 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -361,7 +361,6 @@ noinline static int
btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
struct btree_path *path, unsigned new_u64s)
{
- struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct bkey_cached *ck = (void *) path->l[0].b;
struct bkey_i *new_k;
@@ -372,7 +371,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
if (!new_k) {
- bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+ bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_id_str(path->btree_id), new_u64s);
return -BCH_ERR_ENOMEM_btree_key_cache_insert;
}
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index 0c2db1fa..d3c38d2c 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -29,14 +29,12 @@ static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_re
#ifdef CONFIG_X86_64
int cmp;
- asm(".intel_syntax noprefix;"
- "mov rax, [%[l]];"
- "sub rax, [%[r]];"
- "mov rax, [%[l] + 8];"
- "sbb rax, [%[r] + 8];"
- "mov rax, [%[l] + 16];"
- "sbb rax, [%[r] + 16];"
- ".att_syntax prefix;"
+ asm("mov (%[l]), %%rax;"
+ "sub (%[r]), %%rax;"
+ "mov 8(%[l]), %%rax;"
+ "sbb 8(%[r]), %%rax;"
+ "mov 16(%[l]), %%rax;"
+ "sbb 16(%[r]), %%rax;"
: "=@ccae" (cmp)
: [l] "r" (l), [r] "r" (r)
: "rax", "cc");
@@ -297,7 +295,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
skipped++;
- n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);;
+ n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
k->journal_seq = 0;
continue;
}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 5bfa102a..50eb6ba2 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -277,12 +277,28 @@ void bch2_dev_usage_init(struct bch_dev *ca)
ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
}
-static inline int bucket_sectors_fragmented(struct bch_dev *ca,
- struct bch_alloc_v4 a)
+void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
{
- return a.dirty_sectors
- ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors)
- : 0;
+ prt_tab(out);
+ prt_str(out, "buckets");
+ prt_tab_rjust(out);
+ prt_str(out, "sectors");
+ prt_tab_rjust(out);
+ prt_str(out, "fragmented");
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (unsigned i = 0; i < BCH_DATA_NR; i++) {
+ prt_str(out, bch2_data_types[i]);
+ prt_tab(out);
+ prt_u64(out, usage->d[i].buckets);
+ prt_tab_rjust(out);
+ prt_u64(out, usage->d[i].sectors);
+ prt_tab_rjust(out);
+ prt_u64(out, usage->d[i].fragmented);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
}
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
@@ -306,41 +322,37 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
u->d[old.data_type].buckets--;
u->d[new.data_type].buckets++;
- u->buckets_ec -= (int) !!old.stripe;
- u->buckets_ec += (int) !!new.stripe;
-
- u->d[old.data_type].sectors -= old.dirty_sectors;
- u->d[new.data_type].sectors += new.dirty_sectors;
+ u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old);
+ u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new);
u->d[BCH_DATA_cached].sectors += new.cached_sectors;
u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
- u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
- u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
+ u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old);
+ u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new);
preempt_enable();
}
+struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
+{
+ return (struct bch_alloc_v4) {
+ .gen = b.gen,
+ .data_type = b.data_type,
+ .dirty_sectors = b.dirty_sectors,
+ .cached_sectors = b.cached_sectors,
+ .stripe = b.stripe,
+ };
+}
+
static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
struct bucket old, struct bucket new,
u64 journal_seq, bool gc)
{
- struct bch_alloc_v4 old_a = {
- .gen = old.gen,
- .data_type = old.data_type,
- .dirty_sectors = old.dirty_sectors,
- .cached_sectors = old.cached_sectors,
- .stripe = old.stripe,
- };
- struct bch_alloc_v4 new_a = {
- .gen = new.gen,
- .data_type = new.data_type,
- .dirty_sectors = new.dirty_sectors,
- .cached_sectors = new.cached_sectors,
- .stripe = new.stripe,
- };
-
- bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
+ bch2_dev_usage_update(c, ca,
+ bucket_m_to_alloc(old),
+ bucket_m_to_alloc(new),
+ journal_seq, gc);
}
static inline int __update_replicas(struct bch_fs *c,
@@ -640,7 +652,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
goto err;
}
-
g->data_type = data_type;
g->dirty_sectors += sectors;
new = *g;
@@ -657,14 +668,11 @@ static int check_bucket_ref(struct btree_trans *trans,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
u8 b_gen, u8 bucket_data_type,
- u32 dirty_sectors, u32 cached_sectors)
+ u32 bucket_sectors)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
- u32 bucket_sectors = !ptr->cached
- ? dirty_sectors
- : cached_sectors;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -799,7 +807,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
g->gen, g->data_type,
- g->dirty_sectors, g->cached_sectors);
+ g->dirty_sectors);
if (ret)
goto err;
@@ -829,8 +837,7 @@ static int __mark_pointer(struct btree_trans *trans,
? dirty_sectors
: cached_sectors;
int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
- bucket_gen, *bucket_data_type,
- *dirty_sectors, *cached_sectors);
+ bucket_gen, *bucket_data_type, *dst_sectors);
if (ret)
return ret;
@@ -1559,7 +1566,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
a->v.gen, a->v.data_type,
- a->v.dirty_sectors, a->v.cached_sectors);
+ a->v.dirty_sectors);
if (ret)
goto err;
@@ -2073,8 +2080,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bucket_gens->first_bucket = ca->mi.first_bucket;
bucket_gens->nbuckets = nbuckets;
- bch2_copygc_stop(c);
-
if (resize) {
down_write(&c->gc_lock);
down_write(&ca->bucket_lock);
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index 5574b62e..bc088673 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -203,6 +203,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
}
void bch2_dev_usage_init(struct bch_dev *);
+void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *);
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
{
diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h
index 2a9dab90..783f7101 100644
--- a/libbcachefs/buckets_types.h
+++ b/libbcachefs/buckets_types.h
@@ -33,8 +33,6 @@ struct bucket_gens {
};
struct bch_dev_usage {
- u64 buckets_ec;
-
struct {
u64 buckets;
u64 sectors; /* _compressed_ sectors: */
diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c
index de3d82de..118f0c0c 100644
--- a/libbcachefs/chardev.c
+++ b/libbcachefs/chardev.c
@@ -23,6 +23,12 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
+__must_check
+static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
+{
+ return copy_to_user(to, from, n) ? -EFAULT : 0;
+}
+
/* returns with ref on ca->ref */
static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
unsigned flags)
@@ -149,10 +155,8 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
static long bch2_ioctl_query_uuid(struct bch_fs *c,
struct bch_ioctl_query_uuid __user *user_arg)
{
- if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid,
- sizeof(c->sb.user_uuid)))
- return -EFAULT;
- return 0;
+ return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
+ sizeof(c->sb.user_uuid));
}
#if 0
@@ -341,10 +345,7 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
if (len < sizeof(e))
return -EINVAL;
- if (copy_to_user(buf, &e, sizeof(e)))
- return -EFAULT;
-
- return sizeof(e);
+ return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
}
static const struct file_operations bcachefs_data_ops = {
@@ -474,14 +475,15 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
if (ret)
goto err;
- if (copy_to_user(user_arg, arg,
- sizeof(*arg) + arg->replica_entries_bytes))
- ret = -EFAULT;
+
+ ret = copy_to_user_errcode(user_arg, arg,
+ sizeof(*arg) + arg->replica_entries_bytes);
err:
kfree(arg);
return ret;
}
+/* obsolete, didn't allow for new data types: */
static long bch2_ioctl_dev_usage(struct bch_fs *c,
struct bch_ioctl_dev_usage __user *user_arg)
{
@@ -511,7 +513,6 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
arg.state = ca->mi.state;
arg.bucket_size = ca->mi.bucket_size;
arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
- arg.buckets_ec = src.buckets_ec;
for (i = 0; i < BCH_DATA_NR; i++) {
arg.d[i].buckets = src.d[i].buckets;
@@ -521,10 +522,58 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
percpu_ref_put(&ca->ref);
- if (copy_to_user(user_arg, &arg, sizeof(arg)))
+ return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+}
+
+static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
+ struct bch_ioctl_dev_usage_v2 __user *user_arg)
+{
+ struct bch_ioctl_dev_usage_v2 arg;
+ struct bch_dev_usage src;
+ struct bch_dev *ca;
+ int ret = 0;
+
+ if (!test_bit(BCH_FS_STARTED, &c->flags))
+ return -EINVAL;
+
+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
- return 0;
+ if ((arg.flags & ~BCH_BY_INDEX) ||
+ arg.pad[0] ||
+ arg.pad[1] ||
+ arg.pad[2])
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ src = bch2_dev_usage_read(ca);
+
+ arg.state = ca->mi.state;
+ arg.bucket_size = ca->mi.bucket_size;
+ arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
+ arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
+
+ ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+ if (ret)
+ goto err;
+
+ for (unsigned i = 0; i < arg.nr_data_types; i++) {
+ struct bch_ioctl_dev_usage_type t = {
+ .buckets = src.d[i].buckets,
+ .sectors = src.d[i].sectors,
+ .fragmented = src.d[i].fragmented,
+ };
+
+ ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
+ if (ret)
+ goto err;
+ }
+err:
+ percpu_ref_put(&ca->ref);
+ return ret;
}
static long bch2_ioctl_read_super(struct bch_fs *c,
@@ -561,9 +610,8 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
goto err;
}
- if (copy_to_user((void __user *)(unsigned long)arg.sb, sb,
- vstruct_bytes(sb)))
- ret = -EFAULT;
+ ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
+ vstruct_bytes(sb));
err:
if (!IS_ERR_OR_NULL(ca))
percpu_ref_put(&ca->ref);
@@ -663,6 +711,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
return bch2_ioctl_fs_usage(c, arg);
case BCH_IOCTL_DEV_USAGE:
return bch2_ioctl_dev_usage(c, arg);
+ case BCH_IOCTL_DEV_USAGE_V2:
+ return bch2_ioctl_dev_usage_v2(c, arg);
#if 0
case BCH_IOCTL_START:
BCH_IOCTL(start, struct bch_ioctl_start);
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index a8b148ec..64bdafe3 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
*/
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
- ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
- zstd_cctx_workspace_bound(&params.cParams));
+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
/*
* ZSTD requires that when we decompress we pass in the exact
@@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
size_t len = zstd_compress_cctx(ctx,
dst + 4, dst_len - 4 - 7,
src, src_len,
- &c->zstd_params);
+ &params);
if (zstd_is_error(len))
return 0;
@@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
size_t decompress_workspace_size = 0;
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
c->opts.encoded_extent_max);
+
+ /*
+ * ZSTD is lying: if we allocate the size of the workspace it says it
+ * requires, it returns memory allocation errors
+ */
+ c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams) * 2;
+
struct {
unsigned feature;
enum bch_compression_type type;
@@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
- zstd_cctx_workspace_bound(&params.cParams),
+ c->zstd_workspace_size,
zstd_dctx_workspace_bound() },
}, *i;
bool have_compressed = false;
- c->zstd_params = params;
-
for (i = compression_types;
i < compression_types + ARRAY_SIZE(compression_types);
i++)
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 55769d77..31090796 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -267,6 +267,20 @@ restart_drop_extra_replicas:
goto out;
}
+ if (trace_data_update_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ trace_data_update(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
@@ -356,7 +370,7 @@ void bch2_data_update_exit(struct data_update *update)
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
}
-void bch2_update_unwritten_extent(struct btree_trans *trans,
+static void bch2_update_unwritten_extent(struct btree_trans *trans,
struct data_update *update)
{
struct bch_fs *c = update->op.c;
@@ -436,7 +450,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
+int bch2_extent_drop_ptrs(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct data_update_opts data_opts)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *n;
+ int ret;
+
+ n = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ while (data_opts.kill_ptrs) {
+ unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+ struct bch_extent_ptr *ptr;
+
+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+ data_opts.kill_ptrs ^= 1U << drop;
+ }
+
+ /*
+ * If the new extent no longer has any pointers, bch2_extent_normalize()
+ * will do the appropriate thing with it (turning it into a
+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
+ */
+ bch2_extent_normalize(c, bkey_i_to_s(n));
+
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&n->k))
+ n->k.size = 0;
+
+ return bch2_trans_relock(trans) ?:
+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+}
+
int bch2_data_update_init(struct btree_trans *trans,
+ struct btree_iter *iter,
struct moving_context *ctxt,
struct data_update *m,
struct write_point_specifier wp,
@@ -452,7 +510,7 @@ int bch2_data_update_init(struct btree_trans *trans,
const struct bch_extent_ptr *ptr;
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
unsigned ptrs_locked = 0;
- int ret;
+ int ret = 0;
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -478,6 +536,8 @@ int bch2_data_update_init(struct btree_trans *trans,
bkey_for_each_ptr(ptrs, ptr)
percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
+ unsigned durability_have = 0, durability_removing = 0;
+
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
bool locked;
@@ -489,8 +549,11 @@ int bch2_data_update_init(struct btree_trans *trans,
reserve_sectors += k.k->size;
m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
- } else if (!p.ptr.cached) {
+ durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+ } else if (!p.ptr.cached &&
+ !((1U << i) & m->data_opts.kill_ptrs)) {
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+ durability_have += bch2_extent_ptr_durability(c, &p);
}
/*
@@ -529,6 +592,29 @@ int bch2_data_update_init(struct btree_trans *trans,
i++;
}
+ /*
+ * If current extent durability is less than io_opts.data_replicas,
+ * we're not trying to rereplicate the extent up to data_replicas here -
+ * unless extra_replicas was specified
+ *
+ * Increasing replication is an explicit operation triggered by
+ * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+ */
+ if (durability_have >= io_opts.data_replicas) {
+ m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+ m->data_opts.rewrite_ptrs = 0;
+ /* if iter == NULL, it's just a promote */
+ if (iter)
+ ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+ goto done;
+ }
+
+ m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->data_opts.extra_replicas;
+ m->op.nr_replicas_required = m->op.nr_replicas;
+
+ BUG_ON(!m->op.nr_replicas);
+
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
m->data_opts.extra_replicas
@@ -538,14 +624,11 @@ int bch2_data_update_init(struct btree_trans *trans,
goto err;
}
- m->op.nr_replicas += m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
-
- BUG_ON(!m->op.nr_replicas);
+ if (bkey_extent_is_unwritten(k)) {
+ bch2_update_unwritten_extent(trans, m);
+ goto done;
+ }
- /* Special handling required: */
- if (bkey_extent_is_unwritten(k))
- return -BCH_ERR_unwritten_extent_update;
return 0;
err:
i = 0;
@@ -560,6 +643,9 @@ err:
bch2_bkey_buf_exit(&m->k, c);
bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
return ret;
+done:
+ bch2_data_update_exit(m);
+ return ret ?: -BCH_ERR_data_update_done;
}
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h
index 9dc17b9d..991095bb 100644
--- a/libbcachefs/data_update.h
+++ b/libbcachefs/data_update.h
@@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,
struct bch_extent_crc_unpacked);
+int bch2_extent_drop_ptrs(struct btree_trans *,
+ struct btree_iter *,
+ struct bkey_s_c,
+ struct data_update_opts);
+
void bch2_data_update_exit(struct data_update *);
-void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-int bch2_data_update_init(struct btree_trans *, struct moving_context *,
+int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+ struct moving_context *,
struct data_update *,
struct write_point_specifier,
struct bch_io_opts, struct data_update_opts,
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index e42b4529..4d35e5c6 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -160,7 +160,7 @@
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
x(BCH_ERR_fsck, fsck_repair_impossible) \
x(0, restart_recovery) \
- x(0, unwritten_extent_update) \
+ x(0, data_update_done) \
x(EINVAL, device_state_not_allowed) \
x(EINVAL, member_info_missing) \
x(EINVAL, mismatched_block_size) \
@@ -208,6 +208,7 @@
x(BCH_ERR_invalid_sb, invalid_sb_members) \
x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
x(BCH_ERR_invalid_sb, invalid_sb_replicas) \
+ x(BCH_ERR_invalid_sb, invalid_replicas_entry) \
x(BCH_ERR_invalid_sb, invalid_sb_journal) \
x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \
x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index a864de23..f6c92df5 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
return replicas;
}
-unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
{
- struct bch_dev *ca;
-
if (p->ptr.cached)
return 0;
- ca = bch_dev_bkey_exists(c, p->ptr.dev);
-
- return ca->mi.durability +
- (p->has_ec
- ? p->ec.redundancy
- : 0);
+ return p->has_ec
+ ? p->ec.redundancy + 1
+ : ca->mi.durability;
}
-unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
{
- struct bch_dev *ca;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
- if (p->ptr.cached)
- return 0;
+ return __extent_ptr_durability(ca, p);
+}
- ca = bch_dev_bkey_exists(c, p->ptr.dev);
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
if (ca->mi.state == BCH_MEMBER_STATE_failed)
return 0;
- return ca->mi.durability +
- (p->has_ec
- ? p->ec.redundancy
- : 0);
+ return __extent_ptr_durability(ca, p);
}
unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index b833409c..3281c4dd 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
bio = &op->write.op.wbio.bio;
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
- ret = bch2_data_update_init(trans, NULL, &op->write,
+ ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current),
opts,
(struct data_update_opts) {
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 4ec5d5d3..c2a65523 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -548,6 +548,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+ struct printbuf err = PRINTBUF;
int ret = 0;
if (journal_entry_err_on(bytes < sizeof(*u) ||
@@ -556,10 +557,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
journal_entry_data_usage_bad_size,
"invalid journal entry usage: bad size")) {
journal_entry_null_range(entry, vstruct_next(entry));
- return ret;
+ goto out;
}
+ if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+ c, version, jset, entry,
+ journal_entry_data_usage_bad_size,
+ "invalid journal entry usage: %s", err.buf)) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ goto out;
+ }
+out:
fsck_err:
+ printbuf_exit(&err);
return ret;
}
@@ -676,8 +686,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
le64_to_cpu(u->d[i].sectors),
le64_to_cpu(u->d[i].fragmented));
}
-
- prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec));
}
static int journal_entry_log_validate(struct bch_fs *c,
diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c
index e6d081c0..5340f2d0 100644
--- a/libbcachefs/lru.c
+++ b/libbcachefs/lru.c
@@ -40,8 +40,8 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
u64_to_bucket(lru.offset).offset);
}
-static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
- u64 dev_bucket, u64 time, bool set)
+static inline int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
+ u64 dev_bucket, u64 time, bool set)
{
return time
? bch2_btree_bit_mod(trans, BTREE_ID_lru,
@@ -51,12 +51,12 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
{
- return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, false);
}
int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
{
- return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, true);
}
int bch2_lru_change(struct btree_trans *trans,
@@ -66,8 +66,8 @@ int bch2_lru_change(struct btree_trans *trans,
if (old_time == new_time)
return 0;
- return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
- bch2_lru_set(trans, lru_id, dev_bucket, new_time);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?:
+ __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true);
}
static const char * const bch2_lru_types[] = {
@@ -77,10 +77,11 @@ static const char * const bch2_lru_types[] = {
NULL
};
-static int bch2_check_lru_key(struct btree_trans *trans,
- struct btree_iter *lru_iter,
- struct bkey_s_c lru_k,
- struct bpos *last_flushed_pos)
+/* Returns 1 if key has been deleted */
+int bch2_check_lru_key(struct btree_trans *trans,
+ struct btree_iter *lru_iter,
+ struct bkey_s_c lru_k,
+ struct bpos *last_flushed_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -89,7 +90,6 @@ static int bch2_check_lru_key(struct btree_trans *trans,
const struct bch_alloc_v4 *a;
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
- enum bch_lru_type type = lru_type(lru_k);
struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
u64 idx;
int ret;
@@ -98,7 +98,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
lru_entry_to_invalid_bucket,
"lru key points to nonexistent device:bucket %llu:%llu",
alloc_pos.inode, alloc_pos.offset))
- return bch2_btree_delete_at(trans, lru_iter, 0);
+ goto delete;
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
ret = bkey_err(k);
@@ -107,6 +107,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
a = bch2_alloc_to_v4(k, &a_convert);
+ enum bch_lru_type type = lru_type(lru_k);
switch (type) {
case BCH_LRU_read:
idx = alloc_lru_idx_read(*a);
@@ -114,6 +115,9 @@ static int bch2_check_lru_key(struct btree_trans *trans,
case BCH_LRU_fragmentation:
idx = a->fragmentation_lru;
break;
+ default:
+ /* unknown LRU type, don't check: */
+ goto out;
}
if (lru_k.k->type != KEY_TYPE_set ||
@@ -125,16 +129,18 @@ static int bch2_check_lru_key(struct btree_trans *trans,
goto out;
}
- if (c->opts.reconstruct_alloc ||
+ if ((c->opts.reconstruct_alloc &&
+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_lrus) ||
fsck_err(c, lru_entry_bad,
"incorrect lru entry: lru %s time %llu\n"
" %s\n"
- " for %s",
+ "for\n"
+ " %s",
bch2_lru_types[type],
lru_pos_time(lru_k.k->p),
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
- ret = bch2_btree_delete_at(trans, lru_iter, 0);
+ goto delete;
}
out:
err:
@@ -143,6 +149,14 @@ fsck_err:
printbuf_exit(&buf2);
printbuf_exit(&buf1);
return ret;
+delete:
+ ret = bch2_btree_delete_at(trans, lru_iter, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_lazy_rw|
+ BCH_TRANS_COMMIT_no_enospc) ?:
+ 1;
+ goto out;
}
int bch2_check_lrus(struct bch_fs *c)
@@ -150,15 +164,14 @@ int bch2_check_lrus(struct bch_fs *c)
struct btree_iter iter;
struct bkey_s_c k;
struct bpos last_flushed_pos = POS_MIN;
- int ret = 0;
- ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter,
- BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
- bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
- if (ret)
- bch_err_fn(c, ret);
- return ret;
+ int ret = bch2_trans_run(c,
+ for_each_btree_key2(trans, iter,
+ BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ({
+ int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos);
+ ret2 < 0 ? ret2 : 0;
+ })));
+ bch_err_fn(c, ret);
+ return ret;
}
diff --git a/libbcachefs/lru.h b/libbcachefs/lru.h
index 429dca81..014dba7c 100644
--- a/libbcachefs/lru.h
+++ b/libbcachefs/lru.h
@@ -64,6 +64,8 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
+int bch2_check_lru_key(struct btree_trans *, struct btree_iter *,
+ struct bkey_s_c, struct bpos *);
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index cf36f2b0..c5518a86 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -173,6 +173,7 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
{
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+ bch2_trans_unlock_long(ctxt->trans);
closure_sync(&ctxt->cl);
}
@@ -235,49 +236,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
scnprintf(stats->name, sizeof(stats->name), "%s", name);
}
-static int bch2_extent_drop_ptrs(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k,
- struct data_update_opts data_opts)
-{
- struct bch_fs *c = trans->c;
- struct bkey_i *n;
- int ret;
-
- n = bch2_bkey_make_mut_noupdate(trans, k);
- ret = PTR_ERR_OR_ZERO(n);
- if (ret)
- return ret;
-
- while (data_opts.kill_ptrs) {
- unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
- struct bch_extent_ptr *ptr;
-
- bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
- data_opts.kill_ptrs ^= 1U << drop;
- }
-
- /*
- * If the new extent no longer has any pointers, bch2_extent_normalize()
- * will do the appropriate thing with it (turning it into a
- * KEY_TYPE_error key, or just a discard if it was a cached extent)
- */
- bch2_extent_normalize(c, bkey_i_to_s(n));
-
- /*
- * Since we're not inserting through an extent iterator
- * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
- * we aren't using the extent overwrite path to delete, we're
- * just using the normal key deletion path:
- */
- if (bkey_deleted(&n->k))
- n->k.size = 0;
-
- return bch2_trans_relock(trans) ?:
- bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
-}
-
int bch2_move_extent(struct moving_context *ctxt,
struct move_bucket_in_flight *bucket_in_flight,
struct btree_iter *iter,
@@ -347,19 +305,11 @@ int bch2_move_extent(struct moving_context *ctxt,
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
- ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
+ ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
io_opts, data_opts, iter->btree_id, k);
- if (ret && ret != -BCH_ERR_unwritten_extent_update)
+ if (ret)
goto err_free_pages;
- if (ret == -BCH_ERR_unwritten_extent_update) {
- bch2_update_unwritten_extent(trans, &io->write);
- move_free(io);
- return 0;
- }
-
- BUG_ON(ret);
-
io->write.op.end_io = move_write_done;
if (ctxt->rate)
@@ -403,6 +353,9 @@ err_free_pages:
err_free:
kfree(io);
err:
+ if (ret == -BCH_ERR_data_update_done)
+ return 0;
+
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
trace_move_extent_alloc_mem_fail2(c, k);
return ret;
@@ -506,22 +459,13 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
do {
delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
-
- if (delay) {
- if (delay > HZ / 10)
- bch2_trans_unlock_long(ctxt->trans);
- else
- bch2_trans_unlock(ctxt->trans);
- set_current_state(TASK_INTERRUPTIBLE);
- }
-
- if (kthread_should_stop()) {
- __set_current_state(TASK_RUNNING);
+ if (kthread_should_stop())
return 1;
- }
if (delay)
- schedule_timeout(delay);
+ move_ctxt_wait_event_timeout(ctxt,
+ freezing(current) || kthread_should_stop(),
+ delay);
if (unlikely(freezing(current))) {
bch2_moving_ctxt_flush_all(ctxt);
@@ -729,7 +673,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
}
a = bch2_alloc_to_v4(k, &a_convert);
- dirty_sectors = a->dirty_sectors;
+ dirty_sectors = bch2_bucket_sectors_dirty(*a);
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
fragmentation = a->fragmentation_lru;
diff --git a/libbcachefs/move.h b/libbcachefs/move.h
index cedde6ee..53196567 100644
--- a/libbcachefs/move.h
+++ b/libbcachefs/move.h
@@ -38,6 +38,25 @@ struct moving_context {
wait_queue_head_t wait;
};
+#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \
+({ \
+ int _ret = 0; \
+ while (true) { \
+ bool cond_finished = false; \
+ bch2_moving_ctxt_do_pending_writes(_ctxt); \
+ \
+ if (_cond) \
+ break; \
+ bch2_trans_unlock_long((_ctxt)->trans); \
+ _ret = __wait_event_timeout((_ctxt)->wait, \
+ bch2_moving_ctxt_next_pending_write(_ctxt) || \
+ (cond_finished = (_cond)), _timeout); \
+ if (_ret || ( cond_finished)) \
+ break; \
+ } \
+ _ret; \
+})
+
#define move_ctxt_wait_event(_ctxt, _cond) \
do { \
bool cond_finished = false; \
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index e884324b..d079ba7a 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -91,7 +91,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
a = bch2_alloc_to_v4(k, &_a);
b->k.gen = a->gen;
- b->sectors = a->dirty_sectors;
+ b->sectors = bch2_bucket_sectors_dirty(*a);
ret = data_type_movable(a->data_type) &&
a->fragmentation_lru &&
@@ -149,6 +149,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
struct bkey_s_c k;
size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
+ struct bpos last_flushed_pos = POS_MIN;
int ret;
move_buckets_wait(ctxt, buckets_in_flight, false);
@@ -165,11 +166,16 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
0, k, ({
- struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
- int ret2 = 0;
+ int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos);
+ if (ret2) {
+ ret2 = ret2 < 0 ? ret2 : 0;
+ goto next;
+ }
saw++;
+ struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
+
if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)))
not_movable++;
else if (bucket_in_flight(buckets_in_flight, b.k))
@@ -179,6 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
if (ret2 >= 0)
sectors += b.sectors;
}
+next:
ret2;
}));
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index db2139c0..fefa4395 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -171,6 +171,20 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
return bkey_s_c_null;
}
+ if (trace_rebalance_extent_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "target=");
+ bch2_target_to_text(&buf, c, r->target);
+ prt_str(&buf, " compression=");
+ prt_str(&buf, bch2_compression_opts[r->compression]);
+ prt_str(&buf, " ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ trace_rebalance_extent(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+
return k;
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index a94f2b5e..5f4f76e6 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -302,8 +302,6 @@ static int journal_replay_entry_early(struct bch_fs *c,
struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
- ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec);
-
for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets);
ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors);
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index 820f9989..ccb776e0 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
prt_printf(out, "]");
}
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
+ struct bch_sb *sb,
+ struct printbuf *err)
+{
+ if (!r->nr_devs) {
+ prt_printf(err, "no devices in entry ");
+ goto bad;
+ }
+
+ if (r->nr_required > 1 &&
+ r->nr_required >= r->nr_devs) {
+ prt_printf(err, "bad nr_required in entry ");
+ goto bad;
+ }
+
+ for (unsigned i = 0; i < r->nr_devs; i++)
+ if (!bch2_dev_exists(sb, r->devs[i])) {
+ prt_printf(err, "invalid device %u in entry ", r->devs[i]);
+ goto bad;
+ }
+
+ return 0;
+bad:
+ bch2_replicas_entry_to_text(err, r);
+ return -BCH_ERR_invalid_replicas_entry;
+}
+
void bch2_cpu_replicas_to_text(struct printbuf *out,
struct bch_replicas_cpu *r)
{
@@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
}
static struct bch_replicas_cpu
-cpu_replicas_add_entry(struct bch_replicas_cpu *old,
+cpu_replicas_add_entry(struct bch_fs *c,
+ struct bch_replicas_cpu *old,
struct bch_replicas_entry_v1 *new_entry)
{
unsigned i;
@@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
replicas_entry_bytes(new_entry)),
};
+ for (i = 0; i < new_entry->nr_devs; i++)
+ BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
+
BUG_ON(!new_entry->data_type);
verify_replicas_entry(new_entry);
@@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
if (c->replicas_gc.entries &&
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
- new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
+ new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
if (!new_gc.entries) {
ret = -BCH_ERR_ENOMEM_cpu_replicas;
goto err;
@@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
}
if (!__replicas_has_entry(&c->replicas, new_entry)) {
- new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
+ new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries) {
ret = -BCH_ERR_ENOMEM_cpu_replicas;
goto err;
@@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
if (idx < 0) {
struct bch_replicas_cpu n;
- n = cpu_replicas_add_entry(&c->replicas, r);
+ n = cpu_replicas_add_entry(c, &c->replicas, r);
if (!n.entries)
return -BCH_ERR_ENOMEM_cpu_replicas;
@@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_sb *sb,
struct printbuf *err)
{
- unsigned i, j;
+ unsigned i;
sort_cmp_size(cpu_r->entries,
cpu_r->nr,
@@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_replicas_entry_v1 *e =
cpu_replicas_entry(cpu_r, i);
- if (e->data_type >= BCH_DATA_NR) {
- prt_printf(err, "invalid data type in entry ");
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
-
- if (!e->nr_devs) {
- prt_printf(err, "no devices in entry ");
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
-
- if (e->nr_required > 1 &&
- e->nr_required >= e->nr_devs) {
- prt_printf(err, "bad nr_required in entry ");
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
-
- for (j = 0; j < e->nr_devs; j++)
- if (!bch2_dev_exists(sb, e->devs[j])) {
- prt_printf(err, "invalid device %u in entry ", e->devs[j]);
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
+ int ret = bch2_replicas_entry_validate(e, sb, err);
+ if (ret)
+ return ret;
if (i + 1 < cpu_r->nr) {
struct bch_replicas_entry_v1 *n =
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
index b2bb12a9..654a4b26 100644
--- a/libbcachefs/replicas.h
+++ b/libbcachefs/replicas.h
@@ -9,6 +9,8 @@
void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
void bch2_replicas_entry_to_text(struct printbuf *,
struct bch_replicas_entry_v1 *);
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
+ struct bch_sb *, struct printbuf *);
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
static inline struct bch_replicas_entry_v1 *
diff --git a/libbcachefs/sb-clean.c b/libbcachefs/sb-clean.c
index fedc9e10..8dc0e3db 100644
--- a/libbcachefs/sb-clean.c
+++ b/libbcachefs/sb-clean.c
@@ -256,7 +256,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
u->entry.type = BCH_JSET_ENTRY_dev_usage;
u->dev = cpu_to_le32(dev);
- u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec);
for (i = 0; i < BCH_DATA_NR; i++) {
u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index bed0f857..259af07f 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -259,6 +259,11 @@ static void member_to_text(struct printbuf *out,
prt_printf(out, "(none)");
prt_newline(out);
+ prt_str(out, "Durability:");
+ prt_tab(out);
+ prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m));
+ prt_newline(out);
+
prt_printf(out, "Discard:");
prt_tab(out);
prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index fa853478..512d5665 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -658,7 +658,7 @@ reread:
return 0;
}
-int __bch2_read_super(const char *path, struct bch_opts *opts,
+static int __bch2_read_super(const char *path, struct bch_opts *opts,
struct bch_sb_handle *sb, bool ignore_notbchfs_msg)
{
u64 offset = opt_get(*opts, sb);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 91f75717..552d55dd 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -270,6 +270,8 @@ void bch2_fs_read_only(struct bch_fs *c)
BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
+ bch_verbose(c, "going read-only");
+
/*
* Block new foreground-end write operations from starting - any new
* writes will return -EROFS:
@@ -297,13 +299,21 @@ void bch2_fs_read_only(struct bch_fs *c)
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
+ bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+ if (writes_disabled)
+ bch_verbose(c, "finished waiting for writes to stop");
+
__bch2_fs_read_only(c);
wait_event(bch2_read_only_wait,
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
+ if (!writes_disabled)
+ bch_verbose(c, "finished waiting for writes to stop");
+
clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
clear_bit(BCH_FS_GOING_RO, &c->flags);
+ clear_bit(BCH_FS_RW, &c->flags);
if (!bch2_journal_error(&c->journal) &&
!test_bit(BCH_FS_ERROR, &c->flags) &&
@@ -319,9 +329,9 @@ void bch2_fs_read_only(struct bch_fs *c)
bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c);
+ } else {
+ bch_verbose(c, "done going read-only, filesystem not clean");
}
-
- clear_bit(BCH_FS_RW, &c->flags);
}
static void bch2_fs_read_only_work(struct work_struct *work)
@@ -424,6 +434,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ set_bit(BCH_FS_RW, &c->flags);
+ set_bit(BCH_FS_WAS_RW, &c->flags);
+
+#ifndef BCH_WRITE_REF_DEBUG
+ percpu_ref_reinit(&c->writes);
+#else
+ for (i = 0; i < BCH_WRITE_REF_NR; i++) {
+ BUG_ON(atomic_long_read(&c->writes[i]));
+ atomic_long_inc(&c->writes[i]);
+ }
+#endif
+
ret = bch2_gc_thread_start(c);
if (ret) {
bch_err(c, "error starting gc thread");
@@ -440,24 +462,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
goto err;
}
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_reinit(&c->writes);
-#else
- for (i = 0; i < BCH_WRITE_REF_NR; i++) {
- BUG_ON(atomic_long_read(&c->writes[i]));
- atomic_long_inc(&c->writes[i]);
- }
-#endif
- set_bit(BCH_FS_RW, &c->flags);
- set_bit(BCH_FS_WAS_RW, &c->flags);
-
bch2_do_discards(c);
bch2_do_invalidates(c);
bch2_do_stripe_deletes(c);
bch2_do_pending_node_rewrites(c);
return 0;
err:
- __bch2_fs_read_only(c);
+ if (test_bit(BCH_FS_RW, &c->flags))
+ bch2_fs_read_only(c);
+ else
+ __bch2_fs_read_only(c);
return ret;
}
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 264c46b4..7223418d 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -258,15 +258,16 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
struct btree_iter iter;
struct bkey_s_c k;
enum btree_id id;
- u64 nr_uncompressed_extents = 0,
- nr_compressed_extents = 0,
- nr_incompressible_extents = 0,
- uncompressed_sectors = 0,
- incompressible_sectors = 0,
- compressed_sectors_compressed = 0,
- compressed_sectors_uncompressed = 0;
+ struct compression_type_stats {
+ u64 nr_extents;
+ u64 sectors_compressed;
+ u64 sectors_uncompressed;
+ } s[BCH_COMPRESSION_TYPE_NR];
+ u64 compressed_incompressible = 0;
int ret = 0;
+ memset(s, 0, sizeof(s));
+
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
@@ -279,36 +280,30 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
for_each_btree_key(trans, iter, id, POS_MIN,
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- bool compressed = false, uncompressed = false, incompressible = false;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- switch (p.crc.compression_type) {
- case BCH_COMPRESSION_TYPE_none:
- uncompressed = true;
- uncompressed_sectors += k.k->size;
- break;
- case BCH_COMPRESSION_TYPE_incompressible:
- incompressible = true;
- incompressible_sectors += k.k->size;
- break;
- default:
- compressed_sectors_compressed +=
- p.crc.compressed_size;
- compressed_sectors_uncompressed +=
- p.crc.uncompressed_size;
- compressed = true;
- break;
+ bool compressed = false, incompressible = false;
+
+ bkey_for_each_crc(k.k, ptrs, crc, entry) {
+ incompressible |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
+ compressed |= crc_is_compressed(crc);
+
+ if (crc_is_compressed(crc)) {
+ s[crc.compression_type].nr_extents++;
+ s[crc.compression_type].sectors_compressed += crc.compressed_size;
+ s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size;
}
}
- if (incompressible)
- nr_incompressible_extents++;
- else if (uncompressed)
- nr_uncompressed_extents++;
- else if (compressed)
- nr_compressed_extents++;
+ compressed_incompressible += compressed && incompressible;
+
+ if (!compressed) {
+ unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0;
+
+ s[t].nr_extents++;
+ s[t].sectors_compressed += k.k->size;
+ s[t].sectors_uncompressed += k.k->size;
+ }
}
bch2_trans_iter_exit(trans, &iter);
}
@@ -318,26 +313,45 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
if (ret)
return ret;
- prt_printf(out, "uncompressed:\n");
- prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents);
- prt_printf(out, " size: ");
- prt_human_readable_u64(out, uncompressed_sectors << 9);
- prt_printf(out, "\n");
+ prt_str(out, "type");
+ printbuf_tabstop_push(out, 12);
+ prt_tab(out);
- prt_printf(out, "compressed:\n");
- prt_printf(out, " nr extents: %llu\n", nr_compressed_extents);
- prt_printf(out, " compressed size: ");
- prt_human_readable_u64(out, compressed_sectors_compressed << 9);
- prt_printf(out, "\n");
- prt_printf(out, " uncompressed size: ");
- prt_human_readable_u64(out, compressed_sectors_uncompressed << 9);
- prt_printf(out, "\n");
+ prt_str(out, "compressed");
+ printbuf_tabstop_push(out, 16);
+ prt_tab_rjust(out);
+
+ prt_str(out, "uncompressed");
+ printbuf_tabstop_push(out, 16);
+ prt_tab_rjust(out);
+
+ prt_str(out, "average extent size");
+ printbuf_tabstop_push(out, 24);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
+ prt_str(out, bch2_compression_types[i]);
+ prt_tab(out);
+
+ prt_human_readable_u64(out, s[i].sectors_compressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, s[i].sectors_uncompressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, s[i].nr_extents
+ ? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents)
+ : 0);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ if (compressed_incompressible) {
+ prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible);
+ prt_newline(out);
+ }
- prt_printf(out, "incompressible:\n");
- prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents);
- prt_printf(out, " size: ");
- prt_human_readable_u64(out, incompressible_sectors << 9);
- prt_printf(out, "\n");
return 0;
}
@@ -786,32 +800,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
- prt_tab(out);
- prt_str(out, "buckets");
- prt_tab_rjust(out);
- prt_str(out, "sectors");
- prt_tab_rjust(out);
- prt_str(out, "fragmented");
- prt_tab_rjust(out);
- prt_newline(out);
-
- for (i = 0; i < BCH_DATA_NR; i++) {
- prt_str(out, bch2_data_types[i]);
- prt_tab(out);
- prt_u64(out, stats.d[i].buckets);
- prt_tab_rjust(out);
- prt_u64(out, stats.d[i].sectors);
- prt_tab_rjust(out);
- prt_u64(out, stats.d[i].fragmented);
- prt_tab_rjust(out);
- prt_newline(out);
- }
-
- prt_str(out, "ec");
- prt_tab(out);
- prt_u64(out, stats.buckets_ec);
- prt_tab_rjust(out);
- prt_newline(out);
+ bch2_dev_usage_to_text(out, &stats);
prt_newline(out);
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index 7b24e7fe..4980cfdd 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -32,19 +32,21 @@ DECLARE_EVENT_CLASS(bpos,
TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot)
);
-DECLARE_EVENT_CLASS(bkey,
- TP_PROTO(struct bch_fs *c, const char *k),
- TP_ARGS(c, k),
+DECLARE_EVENT_CLASS(str,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str),
TP_STRUCT__entry(
- __string(k, k )
+ __field(dev_t, dev )
+ __string(str, str )
),
TP_fast_assign(
- __assign_str(k, k);
+ __entry->dev = c->dev;
+ __assign_str(str, str);
),
- TP_printk("%s", __get_str(k))
+ TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
);
DECLARE_EVENT_CLASS(btree_node,
@@ -736,22 +738,22 @@ TRACE_EVENT(bucket_evacuate,
__entry->dev_idx, __entry->bucket)
);
-DEFINE_EVENT(bkey, move_extent,
+DEFINE_EVENT(str, move_extent,
TP_PROTO(struct bch_fs *c, const char *k),
TP_ARGS(c, k)
);
-DEFINE_EVENT(bkey, move_extent_read,
+DEFINE_EVENT(str, move_extent_read,
TP_PROTO(struct bch_fs *c, const char *k),
TP_ARGS(c, k)
);
-DEFINE_EVENT(bkey, move_extent_write,
+DEFINE_EVENT(str, move_extent_write,
TP_PROTO(struct bch_fs *c, const char *k),
TP_ARGS(c, k)
);
-DEFINE_EVENT(bkey, move_extent_finish,
+DEFINE_EVENT(str, move_extent_finish,
TP_PROTO(struct bch_fs *c, const char *k),
TP_ARGS(c, k)
);
@@ -773,7 +775,7 @@ TRACE_EVENT(move_extent_fail,
TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
);
-DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
+DEFINE_EVENT(str, move_extent_alloc_mem_fail,
TP_PROTO(struct bch_fs *c, const char *k),
TP_ARGS(c, k)
);
@@ -1366,6 +1368,16 @@ TRACE_EVENT(write_buffer_flush_slowpath,
TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
);
+DEFINE_EVENT(str, rebalance_extent,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(str, data_update,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
#endif /* _TRACE_BCACHEFS_H */
/* This part must be outside protection */