aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-07-10 20:31:34 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-07-15 17:36:15 -0400
commitc8bec83e307f28751c433ba1d3f648429fb5a34c (patch)
tree6e70e0cf8f25117f706214d86a0689ee8495dca0
parent1c156d5c4667c1c2e2949b229dfef75696196d35 (diff)
downloadbcachefs-tools-c8bec83e307f28751c433ba1d3f648429fb5a34c.tar.gz
Update bcachefs sources to e14d7c7195 bcachefs: Compression levels
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--cmd_dump.c2
-rw-r--r--cmd_fsck.c8
-rw-r--r--cmd_list_journal.c2
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/lz4.h5
-rw-r--r--include/linux/rcupdate.h1
-rw-r--r--include/linux/slab.h3
-rw-r--r--libbcachefs.c3
-rw-r--r--libbcachefs/bcachefs.h5
-rw-r--r--libbcachefs/bcachefs_format.h51
-rw-r--r--libbcachefs/btree_gc.c2
-rw-r--r--libbcachefs/btree_update_leaf.c4
-rw-r--r--libbcachefs/btree_write_buffer.c3
-rw-r--r--libbcachefs/checksum.h6
-rw-r--r--libbcachefs/compress.c157
-rw-r--r--libbcachefs/compress.h37
-rw-r--r--libbcachefs/data_update.c4
-rw-r--r--libbcachefs/disk_groups.c21
-rw-r--r--libbcachefs/disk_groups.h7
-rw-r--r--libbcachefs/error.c12
-rw-r--r--libbcachefs/error.h7
-rw-r--r--libbcachefs/fs-io.c85
-rw-r--r--libbcachefs/fsck.c4
-rw-r--r--libbcachefs/io.c14
-rw-r--r--libbcachefs/io.h2
-rw-r--r--libbcachefs/io_types.h2
-rw-r--r--libbcachefs/opts.c115
-rw-r--r--libbcachefs/opts.h32
-rw-r--r--libbcachefs/rebalance.c3
-rw-r--r--libbcachefs/recovery.c11
-rw-r--r--libbcachefs/subvolume.c130
-rw-r--r--libbcachefs/subvolume.h86
-rw-r--r--libbcachefs/subvolume_types.h4
-rw-r--r--libbcachefs/super-io.c17
-rw-r--r--libbcachefs/super.c19
-rw-r--r--libbcachefs/util.h2
-rw-r--r--linux/blkdev.c11
-rw-r--r--rust-src/src/cmd_list.rs2
39 files changed, 645 insertions, 237 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 9071b137..06166c86 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-04f2d2ae5bd69eecd0b4bb700efdc665b09745a1
+e14d7c7195b974bbaf400f9c3f2bdaa94fc8d372
diff --git a/cmd_dump.c b/cmd_dump.c
index 59d73c3c..cc25a6a3 100644
--- a/cmd_dump.c
+++ b/cmd_dump.c
@@ -117,7 +117,7 @@ int cmd_dump(int argc, char *argv[])
opt_set(opts, norecovery, true);
opt_set(opts, degraded, true);
opt_set(opts, errors, BCH_ON_ERROR_continue);
- opt_set(opts, fix_errors, FSCK_OPT_NO);
+ opt_set(opts, fix_errors, FSCK_FIX_no);
while ((opt = getopt(argc, argv, "o:fjvh")) != -1)
switch (opt) {
diff --git a/cmd_fsck.c b/cmd_fsck.c
index 54ace957..00134971 100644
--- a/cmd_fsck.c
+++ b/cmd_fsck.c
@@ -37,7 +37,7 @@ int cmd_fsck(int argc, char *argv[])
opt_set(opts, degraded, true);
opt_set(opts, fsck, true);
- opt_set(opts, fix_errors, FSCK_OPT_ASK);
+ opt_set(opts, fix_errors, FSCK_FIX_ask);
while ((opt = getopt_long(argc, argv,
"apynfo:rvh",
@@ -45,14 +45,14 @@ int cmd_fsck(int argc, char *argv[])
switch (opt) {
case 'a': /* outdated alias for -p */
case 'p':
- opt_set(opts, fix_errors, FSCK_OPT_YES);
+ opt_set(opts, fix_errors, FSCK_FIX_yes);
break;
case 'y':
- opt_set(opts, fix_errors, FSCK_OPT_YES);
+ opt_set(opts, fix_errors, FSCK_FIX_no);
break;
case 'n':
opt_set(opts, nochanges, true);
- opt_set(opts, fix_errors, FSCK_OPT_NO);
+ opt_set(opts, fix_errors, FSCK_FIX_no);
break;
case 'f':
/* force check, even if filesystem marked clean: */
diff --git a/cmd_list_journal.c b/cmd_list_journal.c
index 17124225..ce4caad4 100644
--- a/cmd_list_journal.c
+++ b/cmd_list_journal.c
@@ -234,7 +234,7 @@ int cmd_list_journal(int argc, char *argv[])
opt_set(opts, norecovery, true);
opt_set(opts, degraded, true);
opt_set(opts, errors, BCH_ON_ERROR_continue);
- opt_set(opts, fix_errors, FSCK_OPT_YES);
+ opt_set(opts, fix_errors, FSCK_FIX_yes);
opt_set(opts, keep_journal, true);
opt_set(opts, read_journal_only,true);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9e020775..7d378ab2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -44,6 +44,7 @@ struct user_namespace;
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400)
+#define FMODE_BUFFERED ((__force fmode_t)0x800)
struct inode {
unsigned long i_ino;
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 86e1dde3..f574964a 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -2,4 +2,9 @@
#define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \
LZ4_compress_destSize(src, dst, srclen, dstlen)
+
+#define LZ4_compress_HC(src, dst, srclen, dstlen, level, workspace) -1
+
#define LZ4_MEM_COMPRESS 0
+#define LZ4HC_MEM_COMPRESS 0
+#define LZ4HC_MIN_CLEVEL 0
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3db40cb4..ef032531 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -12,6 +12,7 @@
#define rcu_access_pointer(p) READ_ONCE(p)
#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
+#define kvfree_rcu(ptr) kfree(ptr) /* XXX */
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a36f6f43..78f906a8 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -213,7 +213,7 @@ static inline struct kmem_cache *kmem_cache_create(size_t obj_size)
#define vfree(p) free(p)
-static inline void *__vmalloc(unsigned long size, gfp_t flags)
+static inline void *__vmalloc_noprof(unsigned long size, gfp_t flags)
{
unsigned i;
void *p;
@@ -234,6 +234,7 @@ static inline void *__vmalloc(unsigned long size, gfp_t flags)
return p;
}
+#define __vmalloc __vmalloc_noprof
static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
{
diff --git a/libbcachefs.c b/libbcachefs.c
index 092a54a6..bac772b2 100644
--- a/libbcachefs.c
+++ b/libbcachefs.c
@@ -604,8 +604,7 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
u64 v;
for (i = 0; i < bch2_opts_nr; i++) {
- if (!strs.by_id[i] ||
- bch2_opt_table[i].type == BCH_OPT_FN)
+ if (!strs.by_id[i])
continue;
ret = bch2_opt_parse(NULL,
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index d8c02064..445d010c 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -774,9 +774,10 @@ struct bch_fs {
struct mutex sb_lock;
/* snapshot.c: */
- GENRADIX(struct snapshot_t) snapshots;
- struct bch_snapshot_table __rcu *snapshot_table;
+ struct snapshot_table __rcu *snapshots;
+ size_t snapshot_table_size;
struct mutex snapshot_table_lock;
+
struct work_struct snapshot_delete_work;
struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked;
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 6d693e4d..5c308f84 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -695,7 +695,7 @@ struct bch_reservation {
/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\
((sizeof(struct bch_extent_crc128) + \
- sizeof(struct bch_extent_ptr)) / sizeof(u64))
+ sizeof(struct bch_extent_ptr)) / sizeof(__u64))
/* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX \
@@ -707,7 +707,7 @@ struct bch_reservation {
/* Btree pointers don't carry around checksums: */
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
((sizeof(struct bch_btree_ptr_v2) + \
- sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
+ sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
#define BKEY_BTREE_PTR_U64s_MAX \
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
@@ -749,7 +749,7 @@ struct bch_inode_v3 {
} __packed __aligned(8);
#define INODEv3_FIELDS_START_INITIAL 6
-#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
+#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
struct bch_inode_generation {
struct bch_val v;
@@ -916,7 +916,7 @@ struct bch_dirent {
#define DT_SUBVOL 16
#define BCH_DT_MAX 17
-#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(u64) - \
+#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(__u64) - \
sizeof(struct bkey) - \
offsetof(struct bch_dirent, d_name)))
@@ -1009,7 +1009,7 @@ struct bch_alloc_v4 {
} __packed __aligned(8);
#define BCH_ALLOC_V4_U64s_V0 6
-#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64))
+#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
@@ -1289,10 +1289,10 @@ struct bch_key {
};
#define BCH_KEY_MAGIC \
- (((u64) 'b' << 0)|((u64) 'c' << 8)| \
- ((u64) 'h' << 16)|((u64) '*' << 24)| \
- ((u64) '*' << 32)|((u64) 'k' << 40)| \
- ((u64) 'e' << 48)|((u64) 'y' << 56))
+ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \
+ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \
+ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \
+ ((__u64) 'e' << 48)|((__u64) 'y' << 56))
struct bch_encrypted_key {
__le64 magic;
@@ -1747,7 +1747,7 @@ LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
-LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8);
LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
@@ -1767,7 +1767,7 @@ LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
-LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO,
struct bch_sb, flags[2], 0, 4);
LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
@@ -1783,11 +1783,36 @@ LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56);
-/* flags[4] 56-64 unused: */
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60);
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
+ struct bch_sb, flags[4], 60, 64);
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
+static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
+{
+ return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4);
+}
+
+static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
+{
+ SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v);
+ SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4);
+}
+
+static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb)
+{
+ return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) |
+ (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4);
+}
+
+static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
+{
+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v);
+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4);
+}
+
/*
* Features:
*
@@ -2272,7 +2297,7 @@ static inline __u64 BTREE_NODE_ID(struct btree_node *n)
return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4);
}
-static inline void SET_BTREE_NODE_ID(struct btree_node *n, u64 v)
+static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v)
{
SET_BTREE_NODE_ID_LO(n, v);
SET_BTREE_NODE_ID_HI(n, v >> 4);
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index c47d5d8c..be537b23 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -1811,7 +1811,7 @@ again:
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
(BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations &&
- c->opts.fix_errors != FSCK_OPT_NO)) {
+ c->opts.fix_errors != FSCK_FIX_no)) {
bch_info(c, "Starting topology repair pass");
ret = bch2_repair_topology(c);
if (ret)
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 53219fdc..3638cef2 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -311,7 +311,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot &&
- bch2_snapshot_internal_node(trans->c, i->k->k.p.snapshot));
+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
}
static noinline int
@@ -1229,7 +1229,7 @@ static inline int check_pos_snapshot_overwritten(struct btree_trans *trans,
struct bpos pos)
{
if (!btree_type_has_snapshots(id) ||
- !snapshot_t(trans->c, pos.snapshot)->children[0])
+ bch2_snapshot_is_leaf(trans->c, pos.snapshot))
return 0;
return __check_pos_snapshot_overwritten(trans, id, pos);
diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c
index b5022631..6c30a72e 100644
--- a/libbcachefs/btree_write_buffer.c
+++ b/libbcachefs/btree_write_buffer.c
@@ -129,6 +129,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
keys = wb->keys[s.idx];
nr = s.nr;
+ if (race_fault())
+ goto slowpath;
+
/*
* We first sort so that we can detect and skip redundant updates, and
* then we attempt to flush in sorted btree order, as this is most
diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h
index 409ad534..1ad1d5f0 100644
--- a/libbcachefs/checksum.h
+++ b/libbcachefs/checksum.h
@@ -120,12 +120,6 @@ static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
}
-static const unsigned bch2_compression_opt_to_type[] = {
-#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
- BCH_COMPRESSION_OPTS()
-#undef x
-};
-
static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
unsigned type)
{
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index 48427a27..560214c1 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -296,21 +296,32 @@ static int attempt_compress(struct bch_fs *c,
void *workspace,
void *dst, size_t dst_len,
void *src, size_t src_len,
- enum bch_compression_type compression_type)
+ struct bch_compression_opt compression)
{
- switch (compression_type) {
- case BCH_COMPRESSION_TYPE_lz4: {
- int len = src_len;
- int ret = LZ4_compress_destSize(
- src, dst,
- &len, dst_len,
- workspace);
-
- if (len < src_len)
- return -len;
+ enum bch_compression_type compression_type =
+ __bch2_compression_opt_to_type[compression.type];
- return ret;
- }
+ switch (compression_type) {
+ case BCH_COMPRESSION_TYPE_lz4:
+ if (compression.level < LZ4HC_MIN_CLEVEL) {
+ int len = src_len;
+ int ret = LZ4_compress_destSize(
+ src, dst,
+ &len, dst_len,
+ workspace);
+ if (len < src_len)
+ return -len;
+
+ return ret;
+ } else {
+ int ret = LZ4_compress_HC(
+ src, dst,
+ src_len, dst_len,
+ compression.level,
+ workspace);
+
+ return ret ?: -1;
+ }
case BCH_COMPRESSION_TYPE_gzip: {
z_stream strm = {
.next_in = src,
@@ -320,7 +331,11 @@ static int attempt_compress(struct bch_fs *c,
};
zlib_set_workspace(&strm, workspace);
- zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ zlib_deflateInit2(&strm,
+ compression.level
+ ? clamp_t(unsigned, compression.level,
+ Z_BEST_SPEED, Z_BEST_COMPRESSION)
+ : Z_DEFAULT_COMPRESSION,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY);
@@ -333,8 +348,14 @@ static int attempt_compress(struct bch_fs *c,
return strm.total_out;
}
case BCH_COMPRESSION_TYPE_zstd: {
+ /*
+ * rescale:
+ * zstd max compression level is 22, our max level is 15
+ */
+ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
+ ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
- zstd_cctx_workspace_bound(&c->zstd_params.cParams));
+ zstd_cctx_workspace_bound(&params.cParams));
/*
* ZSTD requires that when we decompress we pass in the exact
@@ -365,10 +386,12 @@ static int attempt_compress(struct bch_fs *c,
static unsigned __bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len,
- enum bch_compression_type compression_type)
+ struct bch_compression_opt compression)
{
struct bbuf src_data = { NULL }, dst_data = { NULL };
void *workspace;
+ enum bch_compression_type compression_type =
+ __bch2_compression_opt_to_type[compression.type];
unsigned pad;
int ret = 0;
@@ -400,7 +423,7 @@ static unsigned __bio_compress(struct bch_fs *c,
ret = attempt_compress(c, workspace,
dst_data.b, *dst_len,
src_data.b, *src_len,
- compression_type);
+ compression);
if (ret > 0) {
*dst_len = ret;
ret = 0;
@@ -447,22 +470,24 @@ static unsigned __bio_compress(struct bch_fs *c,
BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
BUG_ON(*dst_len & (block_bytes(c) - 1));
BUG_ON(*src_len & (block_bytes(c) - 1));
+ ret = compression_type;
out:
bio_unmap_or_unbounce(c, src_data);
bio_unmap_or_unbounce(c, dst_data);
- return compression_type;
+ return ret;
err:
- compression_type = BCH_COMPRESSION_TYPE_incompressible;
+ ret = BCH_COMPRESSION_TYPE_incompressible;
goto out;
}
unsigned bch2_bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len,
- unsigned compression_type)
+ unsigned compression_opt)
{
unsigned orig_dst = dst->bi_iter.bi_size;
unsigned orig_src = src->bi_iter.bi_size;
+ unsigned compression_type;
/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
@@ -470,11 +495,9 @@ unsigned bch2_bio_compress(struct bch_fs *c,
/* Don't generate a bigger output than input: */
dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
- if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
- compression_type = BCH_COMPRESSION_TYPE_lz4;
-
compression_type =
- __bio_compress(c, dst, dst_len, src, src_len, compression_type);
+ __bio_compress(c, dst, dst_len, src, src_len,
+ bch2_compression_decode(compression_opt));
dst->bi_iter.bi_size = orig_dst;
src->bi_iter.bi_size = orig_src;
@@ -521,8 +544,10 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
}
int bch2_check_set_has_compressed_data(struct bch_fs *c,
- unsigned compression_type)
+ unsigned compression_opt)
{
+ unsigned compression_type = bch2_compression_decode(compression_opt).type;
+
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
return compression_type
@@ -546,14 +571,16 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{
size_t decompress_workspace_size = 0;
bool decompress_workspace_needed;
- ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
+ ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
+ c->opts.encoded_extent_max);
struct {
- unsigned feature;
- unsigned type;
- size_t compress_workspace;
- size_t decompress_workspace;
+ unsigned feature;
+ enum bch_compression_type type;
+ size_t compress_workspace;
+ size_t decompress_workspace;
} compression_types[] = {
- { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
+ { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
@@ -612,16 +639,74 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
return 0;
}
+static u64 compression_opt_to_feature(unsigned v)
+{
+ unsigned type = bch2_compression_decode(v).type;
+ return 1ULL << bch2_compression_opt_to_feature[type];
+}
+
int bch2_fs_compress_init(struct bch_fs *c)
{
u64 f = c->sb.features;
- if (c->opts.compression)
- f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
-
- if (c->opts.background_compression)
- f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
+ f |= compression_opt_to_feature(c->opts.compression);
+ f |= compression_opt_to_feature(c->opts.background_compression);
return __bch2_fs_compress_init(c, f);
+}
+
+int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
+ struct printbuf *err)
+{
+ char *val = kstrdup(_val, GFP_KERNEL);
+ char *p = val, *type_str, *level_str;
+ struct bch_compression_opt opt = { 0 };
+ int ret;
+
+ if (!val)
+ return -ENOMEM;
+
+ type_str = strsep(&p, ":");
+ level_str = p;
+
+ ret = match_string(bch2_compression_opts, -1, type_str);
+ if (ret < 0 && err)
+ prt_str(err, "invalid compression type");
+ if (ret < 0)
+ goto err;
+
+ opt.type = ret;
+
+ if (level_str) {
+ unsigned level;
+
+ ret = kstrtouint(level_str, 10, &level);
+ if (!ret && !opt.type && level)
+ ret = -EINVAL;
+ if (!ret && level > 15)
+ ret = -EINVAL;
+ if (ret < 0 && err)
+ prt_str(err, "invalid compression level");
+ if (ret < 0)
+ goto err;
+
+ opt.level = level;
+ }
+
+ *res = bch2_compression_encode(opt);
+err:
+ kfree(val);
+ return ret;
+}
+
+void bch2_opt_compression_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_sb *sb,
+ u64 v)
+{
+ struct bch_compression_opt opt = bch2_compression_decode(v);
+ prt_str(out, bch2_compression_opts[opt.type]);
+ if (opt.level)
+ prt_printf(out, ":%u", opt.level);
}
diff --git a/libbcachefs/compress.h b/libbcachefs/compress.h
index 4bab1f61..052ea303 100644
--- a/libbcachefs/compress.h
+++ b/libbcachefs/compress.h
@@ -4,6 +4,35 @@
#include "extents_types.h"
+struct bch_compression_opt {
+ u8 type:4,
+ level:4;
+};
+
+static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
+{
+ return (struct bch_compression_opt) {
+ .type = v & 15,
+ .level = v >> 4,
+ };
+}
+
+static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
+{
+ return opt.type|(opt.level << 4);
+}
+
+static const unsigned __bch2_compression_opt_to_type[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
+ BCH_COMPRESSION_OPTS()
+#undef x
+};
+
+static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
+{
+ return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
+}
+
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
@@ -15,4 +44,12 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
void bch2_fs_compress_exit(struct bch_fs *);
int bch2_fs_compress_init(struct bch_fs *);
+int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
+void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+
+#define bch2_opt_compression (struct bch_opt_fn) { \
+ .parse = bch2_opt_compression_parse, \
+ .to_text = bch2_opt_compression_to_text, \
+}
+
#endif /* _BCACHEFS_COMPRESS_H */
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 3c918368..cfc62446 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -455,9 +455,7 @@ int bch2_data_update_init(struct btree_trans *trans,
BCH_WRITE_DATA_ENCODED|
BCH_WRITE_MOVE|
m->data_opts.write_flags;
- m->op.compression_type =
- bch2_compression_opt_to_type[io_opts.background_compression ?:
- io_opts.compression];
+ m->op.compression_opt = io_opts.background_compression ?: io_opts.compression;
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
bkey_for_each_ptr(ptrs, ptr)
diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c
index 52b64007..de14ca3a 100644
--- a/libbcachefs/disk_groups.c
+++ b/libbcachefs/disk_groups.c
@@ -460,30 +460,37 @@ int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
return ret;
}
-int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
+int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
+ struct printbuf *err)
{
struct bch_dev *ca;
int g;
- if (!strlen(buf) || !strcmp(buf, "none")) {
- *v = 0;
+ if (!val)
+ return -EINVAL;
+
+ if (!c)
+ return 0;
+
+ if (!strlen(val) || !strcmp(val, "none")) {
+ *res = 0;
return 0;
}
/* Is it a device? */
- ca = bch2_dev_lookup(c, buf);
+ ca = bch2_dev_lookup(c, val);
if (!IS_ERR(ca)) {
- *v = dev_to_target(ca->dev_idx);
+ *res = dev_to_target(ca->dev_idx);
percpu_ref_put(&ca->ref);
return 0;
}
mutex_lock(&c->sb_lock);
- g = bch2_disk_path_find(&c->disk_sb, buf);
+ g = bch2_disk_path_find(&c->disk_sb, val);
mutex_unlock(&c->sb_lock);
if (g >= 0) {
- *v = group_to_target(g);
+ *res = group_to_target(g);
return 0;
}
diff --git a/libbcachefs/disk_groups.h b/libbcachefs/disk_groups.h
index ec12584c..bd771176 100644
--- a/libbcachefs/disk_groups.h
+++ b/libbcachefs/disk_groups.h
@@ -85,9 +85,14 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned);
-int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+#define bch2_opt_target (struct bch_opt_fn) { \
+ .parse = bch2_opt_target_parse, \
+ .to_text = bch2_opt_target_to_text, \
+}
+
int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index b08cd23d..685464b8 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -204,7 +204,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
prt_str(out, ", continuing");
ret = -BCH_ERR_fsck_ignore;
}
- } else if (c->opts.fix_errors == FSCK_OPT_EXIT) {
+ } else if (c->opts.fix_errors == FSCK_FIX_exit) {
prt_str(out, ", exiting");
ret = -BCH_ERR_fsck_errors_not_fixed;
} else if (flags & FSCK_CAN_FIX) {
@@ -212,7 +212,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
? s->fix
: c->opts.fix_errors;
- if (fix == FSCK_OPT_ASK) {
+ if (fix == FSCK_FIX_ask) {
int ask;
prt_str(out, ": fix?");
@@ -223,13 +223,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
if (ask >= YN_ALLNO && s)
s->fix = ask == YN_ALLNO
- ? FSCK_OPT_NO
- : FSCK_OPT_YES;
+ ? FSCK_FIX_no
+ : FSCK_FIX_yes;
ret = ask & 1
? -BCH_ERR_fsck_fix
: -BCH_ERR_fsck_ignore;
- } else if (fix == FSCK_OPT_YES ||
+ } else if (fix == FSCK_FIX_yes ||
(c->opts.nochanges &&
!(flags & FSCK_CAN_IGNORE))) {
prt_str(out, ", fixing");
@@ -244,7 +244,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
}
if (ret == -BCH_ERR_fsck_ignore &&
- (c->opts.fix_errors == FSCK_OPT_EXIT ||
+ (c->opts.fix_errors == FSCK_FIX_exit ||
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index edf12443..7ce95400 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -91,13 +91,6 @@ do { \
* be able to repair:
*/
-enum fsck_err_opts {
- FSCK_OPT_EXIT,
- FSCK_OPT_YES,
- FSCK_OPT_NO,
- FSCK_OPT_ASK,
-};
-
struct fsck_err_state {
struct list_head list;
const char *fmt;
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index a8060052..6b691b2b 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -35,6 +35,8 @@
#include <trace/events/writeback.h>
+static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned);
+
struct folio_vec {
struct folio *fv_folio;
size_t fv_offset;
@@ -1972,7 +1974,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
darray_for_each(folios, fi) {
struct folio *f = *fi;
u64 f_len = min(end, folio_end_pos(f)) - f_pos;
- unsigned f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter);
+ unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
if (!f_copied) {
folios_trunc(&folios, fi);
@@ -3373,6 +3375,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
struct quota_res quota_res = { 0 };
struct bkey_s_c k;
unsigned sectors;
+ bool is_allocation;
+ u64 hole_start, hole_end;
u32 snapshot;
bch2_trans_begin(&trans);
@@ -3388,6 +3392,10 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if ((ret = bkey_err(k)))
goto bkey_err;
+ hole_start = iter.pos.offset;
+ hole_end = bpos_min(k.k->p, end_pos).offset;
+ is_allocation = bkey_extent_is_allocation(k.k);
+
/* already reserved */
if (bkey_extent_is_reservation(k) &&
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
@@ -3401,17 +3409,26 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
continue;
}
- /*
- * XXX: for nocow mode, we should promote shared extents to
- * unshared here
- */
+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
+ ret = drop_locks_do(&trans,
+ (bch2_clamp_data_hole(&inode->v,
+ &hole_start,
+ &hole_end,
+ opts.data_replicas), 0));
+ bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
+
+ if (ret)
+ goto bkey_err;
+
+ if (hole_start == hole_end)
+ continue;
+ }
- sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset;
+ sectors = hole_end - hole_start;
- if (!bkey_extent_is_allocation(k.k)) {
+ if (!is_allocation) {
ret = bch2_quota_reservation_add(c, inode,
- &quota_res,
- sectors, true);
+ &quota_res, sectors, true);
if (unlikely(ret))
goto bkey_err;
}
@@ -3423,15 +3440,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err;
i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
+
+ drop_locks_do(&trans,
+ (mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
}
- bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */
- mark_pagecache_reserved(inode, start_sector, iter.pos.offset);
-
if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
@@ -3679,14 +3696,16 @@ err:
/* fseek: */
-static int folio_data_offset(struct folio *folio, loff_t pos)
+static int folio_data_offset(struct folio *folio, loff_t pos,
+ unsigned min_replicas)
{
struct bch_folio *s = bch2_folio(folio);
unsigned i, sectors = folio_sectors(folio);
if (s)
for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
- if (s->s[i].state >= SECTOR_dirty)
+ if (s->s[i].state >= SECTOR_dirty &&
+ s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
return i << SECTOR_SHIFT;
return -1;
@@ -3694,7 +3713,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos)
static loff_t bch2_seek_pagecache_data(struct inode *vinode,
loff_t start_offset,
- loff_t end_offset)
+ loff_t end_offset,
+ unsigned min_replicas)
{
struct folio_batch fbatch;
pgoff_t start_index = start_offset >> PAGE_SHIFT;
@@ -3713,7 +3733,8 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
folio_lock(folio);
offset = folio_data_offset(folio,
- max(folio_pos(folio), start_offset));
+ max(folio_pos(folio), start_offset),
+ min_replicas);
if (offset >= 0) {
ret = clamp(folio_pos(folio) + offset,
start_offset, end_offset);
@@ -3775,7 +3796,7 @@ err:
if (next_data > offset)
next_data = bch2_seek_pagecache_data(&inode->v,
- offset, next_data);
+ offset, next_data, 0);
if (next_data >= isize)
return -ENXIO;
@@ -3783,7 +3804,8 @@ err:
return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
}
-static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
+static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
+ unsigned min_replicas)
{
struct folio *folio;
struct bch_folio *s;
@@ -3800,7 +3822,8 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
sectors = folio_sectors(folio);
for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
- if (s->s[i].state < SECTOR_dirty) {
+ if (s->s[i].state < SECTOR_dirty ||
+ s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
*offset = max(*offset,
folio_pos(folio) + (i << SECTOR_SHIFT));
goto unlock;
@@ -3815,18 +3838,34 @@ unlock:
static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
loff_t start_offset,
- loff_t end_offset)
+ loff_t end_offset,
+ unsigned min_replicas)
{
struct address_space *mapping = vinode->i_mapping;
loff_t offset = start_offset;
while (offset < end_offset &&
- !folio_hole_offset(mapping, &offset))
+ !folio_hole_offset(mapping, &offset, min_replicas))
;
return min(offset, end_offset);
}
+static void bch2_clamp_data_hole(struct inode *inode,
+ u64 *hole_start,
+ u64 *hole_end,
+ unsigned min_replicas)
+{
+ *hole_start = bch2_seek_pagecache_hole(inode,
+ *hole_start << 9, *hole_end << 9, min_replicas) >> 9;
+
+ if (*hole_start == *hole_end)
+ return;
+
+ *hole_end = bch2_seek_pagecache_data(inode,
+ *hole_start << 9, *hole_end << 9, min_replicas) >> 9;
+}
+
static loff_t bch2_seek_hole(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
@@ -3856,12 +3895,12 @@ retry:
BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
- offset, MAX_LFS_FILESIZE);
+ offset, MAX_LFS_FILESIZE, 0);
break;
} else if (!bkey_extent_is_data(k.k)) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
max(offset, bkey_start_offset(k.k) << 9),
- k.k->p.offset << 9);
+ k.k->p.offset << 9, 0);
if (next_hole < k.k->p.offset << 9)
break;
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index ddc2782f..7edd4632 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -894,7 +894,7 @@ static int check_inode(struct btree_trans *trans,
* particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown
*/
- if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
+ if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot))
return 0;
if (!bkey_is_inode(k.k))
@@ -2122,6 +2122,8 @@ int bch2_check_directory_structure(struct bch_fs *c)
return ret;
}
+/* check_nlink pass: */
+
struct nlink_table {
size_t nr;
size_t size;
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 33762e4a..8604df80 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -1078,7 +1078,7 @@ static enum prep_encoded_ret {
/* Can we just write the entire extent as is? */
if (op->crc.uncompressed_size == op->crc.live_size &&
op->crc.compressed_size <= wp->sectors_free &&
- (op->crc.compression_type == op->compression_type ||
+ (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
op->incompressible)) {
if (!crc_is_compressed(op->crc) &&
op->csum_type != op->crc.csum_type &&
@@ -1126,7 +1126,7 @@ static enum prep_encoded_ret {
/*
* If we want to compress the data, it has to be decrypted:
*/
- if ((op->compression_type ||
+ if ((op->compression_opt ||
bch2_csum_type_is_encryption(op->crc.csum_type) !=
bch2_csum_type_is_encryption(op->csum_type)) &&
bch2_write_decrypt(op))
@@ -1173,7 +1173,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
}
if (ec_buf ||
- op->compression_type ||
+ op->compression_opt ||
(op->csum_type &&
!(op->flags & BCH_WRITE_PAGES_STABLE)) ||
(bch2_csum_type_is_encryption(op->csum_type) &&
@@ -1196,16 +1196,16 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
dst->bi_iter.bi_size < c->opts.encoded_extent_max)
break;
- BUG_ON(op->compression_type &&
+ BUG_ON(op->compression_opt &&
(op->flags & BCH_WRITE_DATA_ENCODED) &&
bch2_csum_type_is_encryption(op->crc.csum_type));
- BUG_ON(op->compression_type && !bounce);
+ BUG_ON(op->compression_opt && !bounce);
crc.compression_type = op->incompressible
? BCH_COMPRESSION_TYPE_incompressible
- : op->compression_type
+ : op->compression_opt
? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
- op->compression_type)
+ op->compression_opt)
: 0;
if (!crc_is_compressed(crc)) {
dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
index 7a243a5f..1476380d 100644
--- a/libbcachefs/io.h
+++ b/libbcachefs/io.h
@@ -86,7 +86,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
op->written = 0;
op->error = 0;
op->csum_type = bch2_data_checksum_type(c, opts);
- op->compression_type = bch2_compression_opt_to_type[opts.compression];
+ op->compression_opt = opts.compression;
op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required;
op->watermark = BCH_WATERMARK_normal;
diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h
index 0fbdfbf9..737f16d7 100644
--- a/libbcachefs/io_types.h
+++ b/libbcachefs/io_types.h
@@ -115,8 +115,8 @@ struct bch_write_op {
u16 flags;
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
+ unsigned compression_opt:8;
unsigned csum_type:4;
- unsigned compression_type:4;
unsigned nr_replicas:4;
unsigned nr_replicas_required:4;
unsigned watermark:3;
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
index 0c0c83fa..9dcc61ee 100644
--- a/libbcachefs/opts.c
+++ b/libbcachefs/opts.c
@@ -5,6 +5,7 @@
#include "bcachefs.h"
#include "compress.h"
#include "disk_groups.h"
+#include "error.h"
#include "opts.h"
#include "super-io.h"
#include "util.h"
@@ -16,6 +17,11 @@ const char * const bch2_error_actions[] = {
NULL
};
+const char * const bch2_fsck_fix_opts[] = {
+ BCH_FIX_ERRORS_OPTS()
+ NULL
+};
+
const char * const bch2_version_upgrade_opts[] = {
BCH_VERSION_UPGRADE_OPTS()
NULL
@@ -89,6 +95,37 @@ const char * const bch2_fs_usage_types[] = {
#undef x
+int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
+ struct printbuf *err)
+{
+ if (!val) {
+ *res = FSCK_FIX_yes;
+ } else {
+ int ret = match_string(bch2_fsck_fix_opts, -1, val);
+
+ if (ret < 0 && err)
+ prt_str(err, "fix_errors: invalid selection");
+ if (ret < 0)
+ return ret;
+ *res = ret;
+ }
+
+ return 0;
+}
+
+void bch2_opt_fix_errors_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_sb *sb,
+ u64 v)
+{
+ prt_str(out, bch2_fsck_fix_opts[v]);
+}
+
+static const struct bch_opt_fn bch2_opt_fix_errors = {
+ .parse = bch2_opt_fix_errors_parse,
+ .to_text = bch2_opt_fix_errors_to_text,
+};
+
const char * const bch2_d_types[BCH_DT_MAX] = {
[DT_UNKNOWN] = "unknown",
[DT_FIFO] = "fifo",
@@ -167,11 +204,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
.min = _min, .max = _max
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
- .min = 0, .max = ARRAY_SIZE(_choices),\
+ .min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
-#define OPT_FN(_fn) .type = BCH_OPT_FN, \
- .parse = _fn##_parse, \
- .to_text = _fn##_to_text
+#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
[Opt_##_name] = { \
@@ -267,15 +302,26 @@ int bch2_opt_parse(struct bch_fs *c,
switch (opt->type) {
case BCH_OPT_BOOL:
- ret = kstrtou64(val, 10, res);
+ if (val) {
+ ret = kstrtou64(val, 10, res);
+ } else {
+ ret = 0;
+ *res = 1;
+ }
+
if (ret < 0 || (*res != 0 && *res != 1)) {
if (err)
- prt_printf(err, "%s: must be bool",
- opt->attr.name);
+ prt_printf(err, "%s: must be bool", opt->attr.name);
return ret;
}
break;
case BCH_OPT_UINT:
+ if (!val) {
+ prt_printf(err, "%s: required value",
+ opt->attr.name);
+ return -EINVAL;
+ }
+
ret = opt->flags & OPT_HUMAN_READABLE
? bch2_strtou64_h(val, res)
: kstrtou64(val, 10, res);
@@ -287,6 +333,12 @@ int bch2_opt_parse(struct bch_fs *c,
}
break;
case BCH_OPT_STR:
+ if (!val) {
+ prt_printf(err, "%s: required value",
+ opt->attr.name);
+ return -EINVAL;
+ }
+
ret = match_string(opt->choices, -1, val);
if (ret < 0) {
if (err)
@@ -298,10 +350,7 @@ int bch2_opt_parse(struct bch_fs *c,
*res = ret;
break;
case BCH_OPT_FN:
- if (!c)
- return 0;
-
- ret = opt->parse(c, val, res);
+ ret = opt->fn.parse(c, val, res, err);
if (ret < 0) {
if (err)
prt_printf(err, "%s: parse error",
@@ -341,10 +390,10 @@ void bch2_opt_to_text(struct printbuf *out,
if (flags & OPT_SHOW_FULL_LIST)
prt_string_option(out, opt->choices, v);
else
- prt_printf(out, "%s", opt->choices[v]);
+ prt_str(out, opt->choices[v]);
break;
case BCH_OPT_FN:
- opt->to_text(out, c, sb, v);
+ opt->fn.to_text(out, c, sb, v);
break;
default:
BUG();
@@ -405,31 +454,19 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
name = strsep(&opt, "=");
val = opt;
- if (val) {
- id = bch2_mount_opt_lookup(name);
- if (id < 0)
- goto bad_opt;
-
- ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
- if (ret < 0)
- goto bad_val;
- } else {
- id = bch2_mount_opt_lookup(name);
- v = 1;
-
- if (id < 0 &&
- !strncmp("no", name, 2)) {
- id = bch2_mount_opt_lookup(name + 2);
- v = 0;
- }
+ id = bch2_mount_opt_lookup(name);
- if (id < 0)
- goto bad_opt;
-
- if (bch2_opt_table[id].type != BCH_OPT_BOOL)
- goto no_val;
+ /* Check for the form "noopt", negation of a boolean opt: */
+ if (id < 0 &&
+ !val &&
+ !strncmp("no", name, 2)) {
+ id = bch2_mount_opt_lookup(name + 2);
+ val = "0";
}
+ if (id < 0)
+ goto bad_opt;
+
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;
@@ -442,6 +479,10 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
!IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
goto bad_opt;
+ ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
+ if (ret < 0)
+ goto bad_val;
+
bch2_opt_set_by_id(opts, id, v);
}
@@ -456,10 +497,6 @@ bad_val:
pr_err("Invalid mount option %s", err.buf);
ret = -1;
goto out;
-no_val:
- pr_err("Mount option %s requires a value", name);
- ret = -1;
- goto out;
out:
kfree(copied_opts_start);
printbuf_exit(&err);
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index e105a742..8a9db110 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -8,7 +8,10 @@
#include <linux/sysfs.h>
#include "bcachefs_format.h"
+struct bch_fs;
+
extern const char * const bch2_error_actions[];
+extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
@@ -67,6 +70,11 @@ enum opt_type {
BCH_OPT_FN,
};
+struct bch_opt_fn {
+ int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
+ void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+};
+
/**
* x(name, shortopt, type, in mem type, mode, sb_opt)
*
@@ -98,6 +106,18 @@ enum opt_type {
#define BCACHEFS_VERBOSE_DEFAULT false
#endif
+#define BCH_FIX_ERRORS_OPTS() \
+ x(exit, 0) \
+ x(yes, 1) \
+ x(no, 2) \
+ x(ask, 3)
+
+enum fsck_err_opts {
+#define x(t, n) FSCK_FIX_##t,
+ BCH_FIX_ERRORS_OPTS()
+#undef x
+};
+
#define BCH_OPTS() \
x(block_size, u16, \
OPT_FS|OPT_FORMAT| \
@@ -154,12 +174,12 @@ enum opt_type {
NULL, NULL) \
x(compression, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
- OPT_STR(bch2_compression_opts), \
+ OPT_FN(bch2_opt_compression), \
BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \
NULL, NULL) \
x(background_compression, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
- OPT_STR(bch2_compression_opts), \
+ OPT_FN(bch2_opt_compression), \
BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \
NULL, NULL) \
x(str_hash, u8, \
@@ -318,8 +338,8 @@ enum opt_type {
NULL, "Run fsck on mount") \
x(fix_errors, u8, \
OPT_FS|OPT_MOUNT, \
- OPT_BOOL(), \
- BCH2_NO_SB_OPT, false, \
+ OPT_FN(bch2_opt_fix_errors), \
+ BCH2_NO_SB_OPT, FSCK_FIX_exit, \
NULL, "Fix errors during fsck without asking") \
x(ratelimit_errors, u8, \
OPT_FS|OPT_MOUNT, \
@@ -495,8 +515,8 @@ struct bch_option {
u64 min, max;
const char * const *choices;
- int (*parse)(struct bch_fs *, const char *, u64 *);
- void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+
+ struct bch_opt_fn fn;
const char *hint;
const char *help;
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 989f37a3..c3d57723 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -5,6 +5,7 @@
#include "btree_iter.h"
#include "buckets.h"
#include "clock.h"
+#include "compress.h"
#include "disk_groups.h"
#include "errcode.h"
#include "extents.h"
@@ -45,7 +46,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (!p.ptr.cached &&
p.crc.compression_type !=
- bch2_compression_opt_to_type[io_opts->background_compression])
+ bch2_compression_opt_to_type(io_opts->background_compression))
data_opts->rewrite_ptrs |= 1U << i;
i++;
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index b04e1565..63b385d8 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -1162,12 +1162,9 @@ static void check_version_upgrade(struct bch_fs *c)
prt_str(&buf, " incomplete\n");
}
- prt_str(&buf, "Doing ");
- if (BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version))
- prt_str(&buf, "incompatible");
- else
- prt_str(&buf, "compatible");
- prt_str(&buf, "version upgrade from ");
+ prt_printf(&buf, "Doing %s version upgrade from ",
+ BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
+ ? "incompatible" : "compatible");
bch2_version_to_text(&buf, old_version);
prt_str(&buf, " to ");
bch2_version_to_text(&buf, new_version);
@@ -1178,7 +1175,7 @@ static void check_version_upgrade(struct bch_fs *c)
prt_str(&buf, "fsck required");
c->recovery_passes_explicit |= recovery_passes;
- c->opts.fix_errors = FSCK_OPT_YES;
+ c->opts.fix_errors = FSCK_FIX_yes;
}
bch_info(c, "%s", buf.buf);
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index f118e585..c15b4781 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -12,9 +12,9 @@
static int bch2_subvolume_delete(struct btree_trans *, u32);
-static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
+static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{
- struct snapshot_t *s = snapshot_t(c, id);
+ const struct snapshot_t *s = __snapshot_t(t, id);
if (s->skip[2] <= ancestor)
return s->skip[2];
@@ -27,22 +27,83 @@ static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
+ struct snapshot_table *t;
+
EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
+ rcu_read_lock();
+ t = rcu_dereference(c->snapshots);
+
while (id && id < ancestor)
- id = get_ancestor_below(c, id, ancestor);
+ id = get_ancestor_below(t, id, ancestor);
+ rcu_read_unlock();
return id == ancestor;
}
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{
+ struct snapshot_table *t;
+
+ rcu_read_lock();
+ t = rcu_dereference(c->snapshots);
+
while (id && id < ancestor)
- id = snapshot_t(c, id)->parent;
+ id = __snapshot_t(t, id)->parent;
+ rcu_read_unlock();
return id == ancestor;
}
+static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
+{
+ u32 depth;
+
+ rcu_read_lock();
+ depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
+ rcu_read_unlock();
+
+ return depth;
+}
+
+static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
+{
+ size_t idx = U32_MAX - id;
+ size_t new_size;
+ struct snapshot_table *new, *old;
+
+ new_size = max(16UL, roundup_pow_of_two(idx + 1));
+
+ new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ old = c->snapshots;
+ if (old)
+ memcpy(new->s,
+ rcu_dereference_protected(c->snapshots, true)->s,
+ sizeof(new->s[0]) * c->snapshot_table_size);
+
+ rcu_assign_pointer(c->snapshots, new);
+ c->snapshot_table_size = new_size;
+ if (old)
+ kvfree_rcu(old);
+
+ return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+}
+
+static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
+{
+ size_t idx = U32_MAX - id;
+
+ lockdep_assert_held(&c->snapshot_table_lock);
+
+ if (likely(idx < c->snapshot_table_size))
+ return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+
+ return __snapshot_t_mut(c, id);
+}
+
/* Snapshot tree: */
void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
@@ -202,12 +263,15 @@ int bch2_mark_snapshot(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct snapshot_t *t;
+ int ret = 0;
+
+ mutex_lock(&c->snapshot_table_lock);
- t = genradix_ptr_alloc(&c->snapshots,
- U32_MAX - new.k->p.offset,
- GFP_KERNEL);
- if (!t)
- return -BCH_ERR_ENOMEM_mark_snapshot;
+ t = snapshot_t_mut(c, new.k->p.offset);
+ if (!t) {
+ ret = -BCH_ERR_ENOMEM_mark_snapshot;
+ goto err;
+ }
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
@@ -231,8 +295,9 @@ int bch2_mark_snapshot(struct btree_trans *trans,
t->subvol = 0;
t->tree = 0;
}
-
- return 0;
+err:
+ mutex_unlock(&c->snapshot_table_lock);
+ return ret;
}
static int snapshot_lookup(struct btree_trans *trans, u32 id,
@@ -285,9 +350,14 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
nr_live += ret;
}
- snapshot_t(c, id)->equiv = nr_live == 1
- ? snapshot_t(c, child[live_idx])->equiv
+ mutex_lock(&c->snapshot_table_lock);
+
+ snapshot_t_mut(c, id)->equiv = nr_live == 1
+ ? snapshot_t_mut(c, child[live_idx])->equiv
: id;
+
+ mutex_unlock(&c->snapshot_table_lock);
+
return 0;
}
@@ -505,16 +575,18 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
{
- struct snapshot_t *s;
+ const struct snapshot_t *s;
if (!id)
return 0;
+ rcu_read_lock();
s = snapshot_t(c, id);
- if (!s->parent)
- return id;
+ if (s->parent)
+ id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
+ rcu_read_unlock();
- return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
+ return id;
}
static int snapshot_rand_ancestor_good(struct btree_trans *trans,
@@ -613,9 +685,7 @@ static int check_snapshot(struct btree_trans *trans,
struct bch_snapshot v;
struct bkey_i_snapshot *u;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
- struct snapshot_t *parent = parent_id
- ? snapshot_t(c, parent_id)
- : NULL;
+ u32 real_depth;
struct printbuf buf = PRINTBUF;
bool should_have_subvol;
u32 i, id;
@@ -706,16 +776,18 @@ static int check_snapshot(struct btree_trans *trans,
}
ret = 0;
- if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c,
+ real_depth = bch2_snapshot_depth(c, parent_id);
+
+ if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, c,
"snapshot with incorrect depth fields, should be %u:\n %s",
- parent->depth + 1,
+ real_depth,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
- u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0);
+ u->v.depth = cpu_to_le32(real_depth);
s = u->v;
}
@@ -799,9 +871,13 @@ static int check_subvol(struct btree_trans *trans,
if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
- u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
+ u32 snapshot_tree;
struct bch_snapshot_tree st;
+ rcu_read_lock();
+ snapshot_tree = snapshot_t(c, snapshot_root)->tree;
+ rcu_read_unlock();
+
ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
@@ -845,7 +921,7 @@ int bch2_check_subvols(struct bch_fs *c)
void bch2_fs_snapshots_exit(struct bch_fs *c)
{
- genradix_free(&c->snapshots);
+ kfree(c->snapshots);
}
int bch2_snapshots_read(struct bch_fs *c)
@@ -987,7 +1063,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
struct bkey_i_snapshot *n;
struct bkey_s_c k;
unsigned i, j;
- u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
+ u32 depth = bch2_snapshot_depth(c, parent);
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
@@ -1126,7 +1202,7 @@ static int snapshot_delete_key(struct btree_trans *trans,
struct bpos *last_pos)
{
struct bch_fs *c = trans->c;
- u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
if (!bkey_eq(k.k->p, *last_pos))
equiv_seen->nr = 0;
diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h
index ab0b4a6d..6d134dab 100644
--- a/libbcachefs/subvolume.h
+++ b/libbcachefs/subvolume.h
@@ -32,17 +32,31 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
.min_val_size = 24, \
})
-static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
+static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
{
- return genradix_ptr(&c->snapshots, U32_MAX - id);
+ return &t->s[U32_MAX - id];
}
-static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
+static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
+{
+ return __snapshot_t(rcu_dereference(c->snapshots), id);
+}
+
+static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->parent;
}
-static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
+static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = __bch2_snapshot_parent_early(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
+static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
#ifdef CONFIG_BCACHEFS_DEBUG
u32 parent = snapshot_t(c, id)->parent;
@@ -59,10 +73,21 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
#endif
}
+static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = __bch2_snapshot_parent(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
+ rcu_read_lock();
while (n--)
- id = bch2_snapshot_parent(c, id);
+ id = __bch2_snapshot_parent(c, id);
+ rcu_read_unlock();
return id;
}
@@ -71,37 +96,60 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{
u32 parent;
- while ((parent = bch2_snapshot_parent(c, id)))
+ rcu_read_lock();
+ while ((parent = __bch2_snapshot_parent(c, id)))
id = parent;
+ rcu_read_unlock();
+
return id;
}
-static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
+static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->equiv;
}
+static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = __bch2_snapshot_equiv(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
{
- return id == snapshot_t(c, id)->equiv;
+ return id == bch2_snapshot_equiv(c, id);
}
-static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id)
+static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
- struct snapshot_t *s = snapshot_t(c, id);
+ const struct snapshot_t *s;
+ bool ret;
+
+ rcu_read_lock();
+ s = snapshot_t(c, id);
+ ret = s->children[0];
+ rcu_read_unlock();
+
+ return ret;
+}
- return s->children[0] || s->children[1];
+static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
+{
+ return !bch2_snapshot_is_internal_node(c, id);
}
static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
{
- struct snapshot_t *s;
- u32 parent = bch2_snapshot_parent(c, id);
+ const struct snapshot_t *s;
+ u32 parent = __bch2_snapshot_parent(c, id);
if (!parent)
return 0;
- s = snapshot_t(c, bch2_snapshot_parent(c, id));
+ s = snapshot_t(c, __bch2_snapshot_parent(c, id));
if (id == s->children[0])
return s->children[1];
if (id == s->children[1])
@@ -113,9 +161,15 @@ bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{
- struct snapshot_t *t = snapshot_t(c, id);
+ const struct snapshot_t *t;
+ bool ret;
- return (t->children[0]|t->children[1]) != 0;
+ rcu_read_lock();
+ t = snapshot_t(c, id);
+ ret = (t->children[0]|t->children[1]) != 0;
+ rcu_read_unlock();
+
+ return ret;
}
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
diff --git a/libbcachefs/subvolume_types.h b/libbcachefs/subvolume_types.h
index 750d975a..c596e427 100644
--- a/libbcachefs/subvolume_types.h
+++ b/libbcachefs/subvolume_types.h
@@ -16,6 +16,10 @@ struct snapshot_t {
u32 equiv;
};
+struct snapshot_table {
+ struct snapshot_t s[0];
+};
+
typedef struct {
u32 subvol;
u64 inum;
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 35325381..e9ce3f33 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -658,11 +658,18 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
struct printbuf err = PRINTBUF;
__le64 *i;
int ret;
-
+#ifndef __KERNEL__
+retry:
+#endif
memset(sb, 0, sizeof(*sb));
sb->mode = FMODE_READ;
sb->have_bio = true;
+#ifndef __KERNEL__
+ if (opt_get(*opts, direct_io) == false)
+ sb->mode |= FMODE_BUFFERED;
+#endif
+
if (!opt_get(*opts, noexcl))
sb->mode |= FMODE_EXCL;
@@ -747,7 +754,13 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
got_super:
if (le16_to_cpu(sb->sb->block_size) << 9 <
- bdev_logical_block_size(sb->bdev)) {
+ bdev_logical_block_size(sb->bdev) &&
+ opt_get(*opts, direct_io)) {
+#ifndef __KERNEL__
+ opt_set(*opts, direct_io, false);
+ bch2_free_super(sb);
+ goto retry;
+#endif
prt_printf(&err, "block size (%u) smaller than device block size (%u)",
le16_to_cpu(sb->sb->block_size) << 9,
bdev_logical_block_size(sb->bdev));
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 426d2acf..9f1047a7 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -344,6 +344,19 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
{
int ret;
+ /*
+ * Data move operations can't run until after check_snapshots has
+ * completed, and bch2_snapshot_is_ancestor() is available.
+ *
+ * Ideally we'd start copygc/rebalance earlier instead of waiting for
+ * all of recovery/fsck to complete:
+ */
+ ret = bch2_copygc_start(c);
+ if (ret) {
+ bch_err(c, "error starting copygc thread");
+ return ret;
+ }
+
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
@@ -403,12 +416,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
return ret;
}
- ret = bch2_copygc_start(c);
- if (ret) {
- bch_err(c, "error starting copygc thread");
- return ret;
- }
-
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index ca1b799e..5fa29dab 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -64,7 +64,7 @@ static inline void *vpmalloc_noprof(size_t size, gfp_t gfp_mask)
{
return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN,
get_order(size)) ?:
- __vmalloc(size, gfp_mask);
+ __vmalloc_noprof(size, gfp_mask);
}
#define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp))
diff --git a/linux/blkdev.c b/linux/blkdev.c
index 45b03fba..ea901a46 100644
--- a/linux/blkdev.c
+++ b/linux/blkdev.c
@@ -183,16 +183,19 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
else if (mode & FMODE_WRITE)
flags = O_WRONLY;
+ if (!(mode & FMODE_BUFFERED))
+ flags |= O_DIRECT;
+
#if 0
/* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
if (mode & FMODE_EXCL)
flags |= O_EXCL;
#endif
- buffered_fd = open(path, flags);
+ buffered_fd = open(path, flags & ~O_DIRECT);
if (buffered_fd < 0)
return ERR_PTR(-errno);
- fd = open(path, flags|O_DIRECT);
+ fd = open(path, flags);
if (fd < 0)
fd = dup(buffered_fd);
if (fd < 0) {
@@ -200,9 +203,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
return ERR_PTR(-errno);
}
- sync_fd = open(path, flags|O_DIRECT|O_SYNC);
+ sync_fd = open(path, flags|O_SYNC);
if (sync_fd < 0)
- sync_fd = open(path, flags|O_SYNC);
+ sync_fd = open(path, (flags & ~O_DIRECT)|O_SYNC);
if (sync_fd < 0) {
close(fd);
close(buffered_fd);
diff --git a/rust-src/src/cmd_list.rs b/rust-src/src/cmd_list.rs
index 2dc8d719..3f86b8cd 100644
--- a/rust-src/src/cmd_list.rs
+++ b/rust-src/src/cmd_list.rs
@@ -138,7 +138,7 @@ fn cmd_list_inner(opt: Cli) -> anyhow::Result<()> {
opt_set!(fs_opts, errors, bcachefs::bch_error_actions::BCH_ON_ERROR_continue as u8);
if opt.fsck {
- opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_OPT_YES as u8);
+ opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_FIX_yes as u8);
opt_set!(fs_opts, norecovery, 0);
}