summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <ben@decadent.org.uk>2018-09-16 16:49:03 +0100
committerBen Hutchings <ben@decadent.org.uk>2018-09-16 16:49:03 +0100
commit58b9e960c0380f6ccb913c02968bbaed905bce82 (patch)
tree6b416715a8f50a5f8c71e3e988432670a4323421
parent1224dd70227c2c1ce5fa46c86d473bc89ee2cc0c (diff)
downloadlinux-stable-queue-58b9e960c0380f6ccb913c02968bbaed905bce82.tar.gz
Add more security fixes
-rw-r--r--queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch60
-rw-r--r--queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch54
-rw-r--r--queue-3.16/series6
-rw-r--r--queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch29
-rw-r--r--queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch182
-rw-r--r--queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch41
-rw-r--r--queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch155
7 files changed, 527 insertions, 0 deletions
diff --git a/queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch b/queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch
new file mode 100644
index 00000000..805a6cd0
--- /dev/null
+++ b/queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch
@@ -0,0 +1,60 @@
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 3 Jul 2018 17:10:07 +0800
+Subject: btrfs: relocation: Only remove reloc rb_trees if reloc control has
+ been initialized
+
+commit 389305b2aa68723c754f88d9dbd268a400e10664 upstream.
+
+Invalid reloc tree can cause kernel NULL pointer dereference when btrfs
+does some cleanup of the reloc roots.
+
+It turns out that fs_info::reloc_ctl can be NULL in
+btrfs_recover_relocation() as we allocate relocation control after all
+reloc roots have been verified.
+So when we hit: note, we haven't called set_reloc_control() thus
+fs_info::reloc_ctl is still NULL.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=199833
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/btrfs/relocation.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1311,18 +1311,19 @@ static void __del_reloc_root(struct btrf
+ struct mapping_node *node = NULL;
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
+
+- spin_lock(&rc->reloc_root_tree.lock);
+- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+- root->node->start);
+- if (rb_node) {
+- node = rb_entry(rb_node, struct mapping_node, rb_node);
+- rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
++ if (rc) {
++ spin_lock(&rc->reloc_root_tree.lock);
++ rb_node = tree_search(&rc->reloc_root_tree.rb_root,
++ root->node->start);
++ if (rb_node) {
++ node = rb_entry(rb_node, struct mapping_node, rb_node);
++ rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
++ }
++ spin_unlock(&rc->reloc_root_tree.lock);
++ if (!node)
++ return;
++ BUG_ON((struct btrfs_root *)node->data != root);
+ }
+- spin_unlock(&rc->reloc_root_tree.lock);
+-
+- if (!node)
+- return;
+- BUG_ON((struct btrfs_root *)node->data != root);
+
+ spin_lock(&root->fs_info->trans_lock);
+ list_del_init(&root->root_list);
diff --git a/queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch b/queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch
new file mode 100644
index 00000000..25546deb
--- /dev/null
+++ b/queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch
@@ -0,0 +1,54 @@
+From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?=
+ <ernesto.mnd.fernandez@gmail.com>
+Date: Thu, 23 Aug 2018 17:00:25 -0700
+Subject: hfsplus: fix NULL dereference in hfsplus_lookup()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit a7ec7a4193a2eb3b5341243fc0b621c1ac9e4ec4 upstream.
+
+An HFS+ filesystem can be mounted read-only without having a metadata
+directory, which is needed to support hardlinks. But if the catalog
+data is corrupted, a directory lookup may still find dentries claiming
+to be hardlinks.
+
+hfsplus_lookup() does check that ->hidden_dir is not NULL in such a
+situation, but mistakenly does so after dereferencing it for the first
+time. Reorder this check to prevent a crash.
+
+This happens when looking up corrupted catalog data (dentry) on a
+filesystem with no metadata directory (this could only ever happen on a
+read-only mount). Wen Xu sent the replication steps in detail to the
+fsdevel list: https://bugzilla.kernel.org/show_bug.cgi?id=200297
+
+Link: http://lkml.kernel.org/r/20180712215344.q44dyrhymm4ajkao@eaf
+Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
+Reported-by: Wen Xu <wen.xu@gatech.edu>
+Cc: Viacheslav Dubeyko <slava@dubeyko.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/hfsplus/dir.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/hfsplus/dir.c
++++ b/fs/hfsplus/dir.c
+@@ -74,13 +74,13 @@ again:
+ cpu_to_be32(HFSP_HARDLINK_TYPE) &&
+ entry.file.user_info.fdCreator ==
+ cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
++ HFSPLUS_SB(sb)->hidden_dir &&
+ (entry.file.create_date ==
+ HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
+ create_date ||
+ entry.file.create_date ==
+ HFSPLUS_I(sb->s_root->d_inode)->
+- create_date) &&
+- HFSPLUS_SB(sb)->hidden_dir) {
++ create_date)) {
+ struct qstr str;
+ char name[32];
+
diff --git a/queue-3.16/series b/queue-3.16/series
index b7a0fc46..7129c88b 100644
--- a/queue-3.16/series
+++ b/queue-3.16/series
@@ -45,3 +45,9 @@ x86-speculation-protect-against-userspace-userspace-spectrersb.patch
x86-paravirt-fix-spectre-v2-mitigations-for-paravirt-guests.patch
cdrom-fix-info-leak-oob-read-in-cdrom_ioctl_drive_status.patch
uas-replace-warn_on_once-with-lockdep_assert_held.patch
+video-uvesafb-fix-integer-overflow-in-allocation.patch
+btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch
+hfsplus-fix-null-dereference-in-hfsplus_lookup.patch
+xfs-catch-inode-allocation-state-mismatch-corruption.patch
+xfs-validate-cached-inodes-are-free-when-allocated.patch
+xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch
diff --git a/queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch b/queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch
new file mode 100644
index 00000000..02715149
--- /dev/null
+++ b/queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch
@@ -0,0 +1,29 @@
+From: Kees Cook <keescook@chromium.org>
+Date: Fri, 11 May 2018 18:24:12 +1000
+Subject: video: uvesafb: Fix integer overflow in allocation
+
+commit 9f645bcc566a1e9f921bdae7528a01ced5bc3713 upstream.
+
+cmap->len can get close to INT_MAX/2, allowing for an integer overflow in
+allocation. This uses kmalloc_array() instead to catch the condition.
+
+Reported-by: Dr Silvio Cesare of InfoSect <silvio.cesare@gmail.com>
+Fixes: 8bdb3a2d7df48 ("uvesafb: the driver core")
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ drivers/video/fbdev/uvesafb.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/video/fbdev/uvesafb.c
++++ b/drivers/video/fbdev/uvesafb.c
+@@ -1059,7 +1059,8 @@ static int uvesafb_setcmap(struct fb_cma
+ info->cmap.len || cmap->start < info->cmap.start)
+ return -EINVAL;
+
+- entries = kmalloc(sizeof(*entries) * cmap->len, GFP_KERNEL);
++ entries = kmalloc_array(cmap->len, sizeof(*entries),
++ GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
diff --git a/queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch b/queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch
new file mode 100644
index 00000000..44b7783f
--- /dev/null
+++ b/queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch
@@ -0,0 +1,182 @@
+From: Dave Chinner <dchinner@redhat.com>
+Date: Fri, 23 Mar 2018 10:22:53 -0700
+Subject: xfs: catch inode allocation state mismatch corruption
+
+commit ee457001ed6c6f31ddad69c24c1da8f377d8472d upstream.
+
+We recently came across a V4 filesystem causing memory corruption
+due to a newly allocated inode being setup twice and being added to
+the superblock inode list twice. From code inspection, the only way
+this could happen is if a newly allocated inode was not marked as
+free on disk (i.e. di_mode wasn't zero).
+
+Running the metadump on an upstream debug kernel fails during inode
+allocation like so:
+
+XFS: Assertion failed: ip->i_d.di_nblocks == 0, file: fs/xfs/xfs_inod=
+e.c, line: 838
+ ------------[ cut here ]------------
+kernel BUG at fs/xfs/xfs_message.c:114!
+invalid opcode: 0000 [#1] PREEMPT SMP
+CPU: 11 PID: 3496 Comm: mkdir Not tainted 4.16.0-rc5-dgc #442
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/0=
+1/2014
+RIP: 0010:assfail+0x28/0x30
+RSP: 0018:ffffc9000236fc80 EFLAGS: 00010202
+RAX: 00000000ffffffea RBX: 0000000000004000 RCX: 0000000000000000
+RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff8227211b
+RBP: ffffc9000236fce8 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000bec R11: f000000000000000 R12: ffffc9000236fd30
+R13: ffff8805c76bab80 R14: ffff8805c77ac800 R15: ffff88083fb12e10
+FS: 00007fac8cbff040(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000=
+000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fffa6783ff8 CR3: 00000005c6e2b003 CR4: 00000000000606e0
+Call Trace:
+ xfs_ialloc+0x383/0x570
+ xfs_dir_ialloc+0x6a/0x2a0
+ xfs_create+0x412/0x670
+ xfs_generic_create+0x1f7/0x2c0
+ ? capable_wrt_inode_uidgid+0x3f/0x50
+ vfs_mkdir+0xfb/0x1b0
+ SyS_mkdir+0xcf/0xf0
+ do_syscall_64+0x73/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x42/0xb7
+
+Extracting the inode number we crashed on from an event trace and
+looking at it with xfs_db:
+
+xfs_db> inode 184452204
+xfs_db> p
+core.magic = 0x494e
+core.mode = 0100644
+core.version = 2
+core.format = 2 (extents)
+core.nlinkv2 = 1
+core.onlink = 0
+.....
+
+Confirms that it is not a free inode on disk. xfs_repair
+also trips over this inode:
+
+.....
+zero length extent (off = 0, fsbno = 0) in ino 184452204
+correcting nextents for inode 184452204
+bad attribute fork in inode 184452204, would clear attr fork
+bad nblocks 1 for inode 184452204, would reset to 0
+bad anextents 1 for inode 184452204, would reset to 0
+imap claims in-use inode 184452204 is free, would correct imap
+would have cleared inode 184452204
+.....
+disconnected inode 184452204, would move to lost+found
+
+And so we have a situation where the directory structure and the
+inobt thinks the inode is free, but the inode on disk thinks it is
+still in use. Where this corruption came from is not possible to
+diagnose, but we can detect it and prevent the kernel from oopsing
+on lookup. The reproducer now results in:
+
+$ sudo mkdir /mnt/scratch/{0,1,2,3,4,5}{0,1,2,3,4,5}
+mkdir: cannot create directory =E2=80=98/mnt/scratch/00=E2=80=99: File ex=
+ists
+mkdir: cannot create directory =E2=80=98/mnt/scratch/01=E2=80=99: File ex=
+ists
+mkdir: cannot create directory =E2=80=98/mnt/scratch/03=E2=80=99: Structu=
+re needs cleaning
+mkdir: cannot create directory =E2=80=98/mnt/scratch/04=E2=80=99: Input/o=
+utput error
+mkdir: cannot create directory =E2=80=98/mnt/scratch/05=E2=80=99: Input/o=
+utput error
+....
+
+And this corruption shutdown:
+
+[ 54.843517] XFS (loop0): Corruption detected! Free inode 0xafe846c not=
+ marked free on disk
+[ 54.845885] XFS (loop0): Internal error xfs_trans_cancel at line 1023 =
+of file fs/xfs/xfs_trans.c. Caller xfs_create+0x425/0x670
+[ 54.848994] CPU: 10 PID: 3541 Comm: mkdir Not tainted 4.16.0-rc5-dgc #=
+443
+[ 54.850753] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO=
+S 1.10.2-1 04/01/2014
+[ 54.852859] Call Trace:
+[ 54.853531] dump_stack+0x85/0xc5
+[ 54.854385] xfs_trans_cancel+0x197/0x1c0
+[ 54.855421] xfs_create+0x425/0x670
+[ 54.856314] xfs_generic_create+0x1f7/0x2c0
+[ 54.857390] ? capable_wrt_inode_uidgid+0x3f/0x50
+[ 54.858586] vfs_mkdir+0xfb/0x1b0
+[ 54.859458] SyS_mkdir+0xcf/0xf0
+[ 54.860254] do_syscall_64+0x73/0x1a0
+[ 54.861193] entry_SYSCALL_64_after_hwframe+0x42/0xb7
+[ 54.862492] RIP: 0033:0x7fb73bddf547
+[ 54.863358] RSP: 002b:00007ffdaa553338 EFLAGS: 00000246 ORIG_RAX: 0000=
+000000000053
+[ 54.865133] RAX: ffffffffffffffda RBX: 00007ffdaa55449a RCX: 00007fb73=
+bddf547
+[ 54.866766] RDX: 0000000000000001 RSI: 00000000000001ff RDI: 00007ffda=
+a55449a
+[ 54.868432] RBP: 00007ffdaa55449a R08: 00000000000001ff R09: 00005623a=
+8670dd0
+[ 54.870110] R10: 00007fb73be72d5b R11: 0000000000000246 R12: 000000000=
+00001ff
+[ 54.871752] R13: 00007ffdaa5534b0 R14: 0000000000000000 R15: 00007ffda=
+a553500
+[ 54.873429] XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1=
+024 of file fs/xfs/xfs_trans.c. Return address = ffffffff814cd050
+[ 54.882790] XFS (loop0): Corruption of in-memory data detected. Shutt=
+ing down filesystem
+[ 54.884597] XFS (loop0): Please umount the filesystem and rectify the =
+problem(s)
+
+Note that this crash is only possible on v4 filesystemsi or v5
+filesystems mounted with the ikeep mount option. For all other V5
+filesystems, this problem cannot occur because we don't read inodes
+we are allocating from disk - we simply overwrite them with the new
+inode information.
+
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Tested-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+[bwh: Backported to 3.16:
+ - Look up mode in XFS inode, not VFS inode
+ - Use positive error codes, and EIO instead of EFSCORRUPTED]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/xfs/xfs_icache.c | 23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -293,7 +293,28 @@ xfs_iget_cache_miss(
+
+ trace_xfs_iget_miss(ip);
+
+- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
++
++ /*
++ * If we are allocating a new inode, then check what was returned is
++ * actually a free, empty inode. If we are not allocating an inode,
++ * the check we didn't find a free inode.
++ */
++ if (flags & XFS_IGET_CREATE) {
++ if (ip->i_d.di_mode != 0) {
++ xfs_warn(mp,
++"Corruption detected! Free inode 0x%llx not marked free on disk",
++ ino);
++ error = EIO;
++ goto out_destroy;
++ }
++ if (ip->i_d.di_nblocks != 0) {
++ xfs_warn(mp,
++"Corruption detected! Free inode 0x%llx has blocks allocated!",
++ ino);
++ error = EIO;
++ goto out_destroy;
++ }
++ } else if (ip->i_d.di_mode == 0) {
+ error = ENOENT;
+ goto out_destroy;
+ }
diff --git a/queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch b/queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch
new file mode 100644
index 00000000..096549de
--- /dev/null
+++ b/queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch
@@ -0,0 +1,41 @@
+From: Eric Sandeen <sandeen@sandeen.net>
+Date: Fri, 8 Jun 2018 09:53:49 -0700
+Subject: xfs: don't call xfs_da_shrink_inode with NULL bp
+
+commit bb3d48dcf86a97dc25fe9fc2c11938e19cb4399a upstream.
+
+xfs_attr3_leaf_create may have errored out before instantiating a buffer,
+for example if the blkno is out of range. In that case there is no work
+to do to remove it, and in fact xfs_da_shrink_inode will lead to an oops
+if we try.
+
+This also seems to fix a flaw where the original error from
+xfs_attr3_leaf_create gets overwritten in the cleanup case, and it
+removes a pointless assignment to bp which isn't used after this.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199969
+Reported-by: Xu, Wen <wen.xu@gatech.edu>
+Tested-by: Xu, Wen <wen.xu@gatech.edu>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+[bwh: Backported to 3.16: adjust filename]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/xfs/xfs_attr_leaf.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/xfs_attr_leaf.c
++++ b/fs/xfs/xfs_attr_leaf.c
+@@ -701,9 +701,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t
+ ASSERT(blkno == 0);
+ error = xfs_attr3_leaf_create(args, blkno, &bp);
+ if (error) {
+- error = xfs_da_shrink_inode(args, 0, bp);
+- bp = NULL;
+- if (error)
++ /* xfs_attr3_leaf_create may not have instantiated a block */
++ if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
+ goto out;
+ xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
+ memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
diff --git a/queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch b/queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch
new file mode 100644
index 00000000..e758bb09
--- /dev/null
+++ b/queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch
@@ -0,0 +1,155 @@
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 17 Apr 2018 17:17:34 -0700
+Subject: xfs: validate cached inodes are free when allocated
+
+commit afca6c5b2595fc44383919fba740c194b0b76aff upstream.
+
+A recent fuzzed filesystem image cached random dcache corruption
+when the reproducer was run. This often showed up as panics in
+lookup_slow() on a null inode->i_ops pointer when doing pathwalks.
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+....
+Call Trace:
+ lookup_slow+0x44/0x60
+ walk_component+0x3dd/0x9f0
+ link_path_walk+0x4a7/0x830
+ path_lookupat+0xc1/0x470
+ filename_lookup+0x129/0x270
+ user_path_at_empty+0x36/0x40
+ path_listxattr+0x98/0x110
+ SyS_listxattr+0x13/0x20
+ do_syscall_64+0xf5/0x280
+ entry_SYSCALL_64_after_hwframe+0x42/0xb7
+
+but had many different failure modes including deadlocks trying to
+lock the inode that was just allocated or KASAN reports of
+use-after-free violations.
+
+The cause of the problem was a corrupt INOBT on a v4 fs where the
+root inode was marked as free in the inobt record. Hence when we
+allocated an inode, it chose the root inode to allocate, found it in
+the cache and re-initialised it.
+
+We recently fixed a similar inode allocation issue caused by inobt
+record corruption problem in xfs_iget_cache_miss() in commit
+ee457001ed6c ("xfs: catch inode allocation state mismatch
+corruption"). This change adds similar checks to the cache-hit path
+to catch it, and turns the reproducer into a corruption shutdown
+situation.
+
+Reported-by: Wen Xu <wen.xu@gatech.edu>
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix typos in comment]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+[bwh: Backported to 3.16:
+ - Look up mode in XFS inode, not VFS inode
+ - Use positive error codes, and EIO instead of EFSCORRUPTED]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/xfs/xfs_icache.c | 73 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 48 insertions(+), 25 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -133,6 +133,46 @@ xfs_inode_free(
+ }
+
+ /*
++ * If we are allocating a new inode, then check what was returned is
++ * actually a free, empty inode. If we are not allocating an inode,
++ * then check we didn't find a free inode.
++ *
++ * Returns:
++ * 0 if the inode free state matches the lookup context
++ * ENOENT if the inode is free and we are not allocating
++ * EFSCORRUPTED if there is any state mismatch at all
++ */
++static int
++xfs_iget_check_free_state(
++ struct xfs_inode *ip,
++ int flags)
++{
++ if (flags & XFS_IGET_CREATE) {
++ /* should be a free inode */
++ if (ip->i_d.di_mode != 0) {
++ xfs_warn(ip->i_mount,
++"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
++ ip->i_ino, ip->i_d.di_mode);
++ return EIO;
++ }
++
++ if (ip->i_d.di_nblocks != 0) {
++ xfs_warn(ip->i_mount,
++"Corruption detected! Free inode 0x%llx has blocks allocated!",
++ ip->i_ino);
++ return EIO;
++ }
++ return 0;
++ }
++
++ /* should be an allocated inode */
++ if (ip->i_d.di_mode == 0)
++ return ENOENT;
++
++ return 0;
++}
++
++/*
+ * Check the validity of the inode we just found it the cache
+ */
+ static int
+@@ -181,12 +221,12 @@ xfs_iget_cache_hit(
+ }
+
+ /*
+- * If lookup is racing with unlink return an error immediately.
++ * Check the inode free state is valid. This also detects lookup
++ * racing with unlinks.
+ */
+- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+- error = ENOENT;
++ error = xfs_iget_check_free_state(ip, flags);
++ if (error)
+ goto out_error;
+- }
+
+ /*
+ * If IRECLAIMABLE is set, we've torn down the VFS inode already.
+@@ -295,29 +335,12 @@ xfs_iget_cache_miss(
+
+
+ /*
+- * If we are allocating a new inode, then check what was returned is
+- * actually a free, empty inode. If we are not allocating an inode,
+- * the check we didn't find a free inode.
++ * Check the inode free state is valid. This also detects lookup
++ * racing with unlinks.
+ */
+- if (flags & XFS_IGET_CREATE) {
+- if (ip->i_d.di_mode != 0) {
+- xfs_warn(mp,
+-"Corruption detected! Free inode 0x%llx not marked free on disk",
+- ino);
+- error = EIO;
+- goto out_destroy;
+- }
+- if (ip->i_d.di_nblocks != 0) {
+- xfs_warn(mp,
+-"Corruption detected! Free inode 0x%llx has blocks allocated!",
+- ino);
+- error = EIO;
+- goto out_destroy;
+- }
+- } else if (ip->i_d.di_mode == 0) {
+- error = ENOENT;
++ error = xfs_iget_check_free_state(ip, flags);
++ if (error)
+ goto out_destroy;
+- }
+
+ /*
+ * Preload the radix tree so we can insert safely under the