diff options
author | Ben Hutchings <ben@decadent.org.uk> | 2018-09-16 16:49:03 +0100 |
---|---|---|
committer | Ben Hutchings <ben@decadent.org.uk> | 2018-09-16 16:49:03 +0100 |
commit | 58b9e960c0380f6ccb913c02968bbaed905bce82 (patch) | |
tree | 6b416715a8f50a5f8c71e3e988432670a4323421 | |
parent | 1224dd70227c2c1ce5fa46c86d473bc89ee2cc0c (diff) | |
download | linux-stable-queue-58b9e960c0380f6ccb913c02968bbaed905bce82.tar.gz |
Add more security fixes
7 files changed, 527 insertions, 0 deletions
diff --git a/queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch b/queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch new file mode 100644 index 00000000..805a6cd0 --- /dev/null +++ b/queue-3.16/btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch @@ -0,0 +1,60 @@ +From: Qu Wenruo <wqu@suse.com> +Date: Tue, 3 Jul 2018 17:10:07 +0800 +Subject: btrfs: relocation: Only remove reloc rb_trees if reloc control has + been initialized + +commit 389305b2aa68723c754f88d9dbd268a400e10664 upstream. + +Invalid reloc tree can cause kernel NULL pointer dereference when btrfs +does some cleanup of the reloc roots. + +It turns out that fs_info::reloc_ctl can be NULL in +btrfs_recover_relocation() as we allocate relocation control after all +reloc roots have been verified. +So when we hit: note, we haven't called set_reloc_control() thus +fs_info::reloc_ctl is still NULL. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199833 +Reported-by: Xu Wen <wen.xu@gatech.edu> +Signed-off-by: Qu Wenruo <wqu@suse.com> +Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com> +Reviewed-by: David Sterba <dsterba@suse.com> +Signed-off-by: David Sterba <dsterba@suse.com> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/btrfs/relocation.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -1311,18 +1311,19 @@ static void __del_reloc_root(struct btrf + struct mapping_node *node = NULL; + struct reloc_control *rc = root->fs_info->reloc_ctl; + +- spin_lock(&rc->reloc_root_tree.lock); +- rb_node = tree_search(&rc->reloc_root_tree.rb_root, +- root->node->start); +- if (rb_node) { +- node = rb_entry(rb_node, struct mapping_node, rb_node); +- rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); ++ if (rc) { ++ spin_lock(&rc->reloc_root_tree.lock); ++ rb_node = tree_search(&rc->reloc_root_tree.rb_root, ++ root->node->start); ++ if (rb_node) { ++ node = rb_entry(rb_node, struct mapping_node, rb_node); ++ rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); ++ } ++ spin_unlock(&rc->reloc_root_tree.lock); ++ if (!node) ++ return; ++ BUG_ON((struct btrfs_root *)node->data != root); + } +- spin_unlock(&rc->reloc_root_tree.lock); +- +- if (!node) +- return; +- BUG_ON((struct btrfs_root *)node->data != root); + + spin_lock(&root->fs_info->trans_lock); + list_del_init(&root->root_list); diff --git a/queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch b/queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch new file mode 100644 index 00000000..25546deb --- /dev/null +++ b/queue-3.16/hfsplus-fix-null-dereference-in-hfsplus_lookup.patch @@ -0,0 +1,54 @@ +From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= + <ernesto.mnd.fernandez@gmail.com> +Date: Thu, 23 Aug 2018 17:00:25 -0700 +Subject: hfsplus: fix NULL dereference in hfsplus_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit a7ec7a4193a2eb3b5341243fc0b621c1ac9e4ec4 upstream. + +An HFS+ filesystem can be mounted read-only without having a metadata +directory, which is needed to support hardlinks. But if the catalog +data is corrupted, a directory lookup may still find dentries claiming +to be hardlinks. + +hfsplus_lookup() does check that ->hidden_dir is not NULL in such a +situation, but mistakenly does so after dereferencing it for the first +time. Reorder this check to prevent a crash. + +This happens when looking up corrupted catalog data (dentry) on a +filesystem with no metadata directory (this could only ever happen on a +read-only mount). Wen Xu sent the replication steps in detail to the +fsdevel list: https://bugzilla.kernel.org/show_bug.cgi?id=200297 + +Link: http://lkml.kernel.org/r/20180712215344.q44dyrhymm4ajkao@eaf +Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com> +Reported-by: Wen Xu <wen.xu@gatech.edu> +Cc: Viacheslav Dubeyko <slava@dubeyko.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +[bwh: Backported to 3.16: adjust context] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/hfsplus/dir.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/hfsplus/dir.c ++++ b/fs/hfsplus/dir.c +@@ -74,13 +74,13 @@ again: + cpu_to_be32(HFSP_HARDLINK_TYPE) && + entry.file.user_info.fdCreator == + cpu_to_be32(HFSP_HFSPLUS_CREATOR) && ++ HFSPLUS_SB(sb)->hidden_dir && + (entry.file.create_date == + HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)-> + create_date || + entry.file.create_date == + HFSPLUS_I(sb->s_root->d_inode)-> +- create_date) && +- HFSPLUS_SB(sb)->hidden_dir) { ++ create_date)) { + struct qstr str; + char name[32]; + diff --git a/queue-3.16/series b/queue-3.16/series index b7a0fc46..7129c88b 100644 --- a/queue-3.16/series +++ b/queue-3.16/series @@ -45,3 +45,9 @@ x86-speculation-protect-against-userspace-userspace-spectrersb.patch x86-paravirt-fix-spectre-v2-mitigations-for-paravirt-guests.patch cdrom-fix-info-leak-oob-read-in-cdrom_ioctl_drive_status.patch uas-replace-warn_on_once-with-lockdep_assert_held.patch +video-uvesafb-fix-integer-overflow-in-allocation.patch +btrfs-relocation-only-remove-reloc-rb_trees-if-reloc-control-has.patch +hfsplus-fix-null-dereference-in-hfsplus_lookup.patch +xfs-catch-inode-allocation-state-mismatch-corruption.patch +xfs-validate-cached-inodes-are-free-when-allocated.patch +xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch diff --git a/queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch b/queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch new file mode 100644 index 00000000..02715149 --- /dev/null +++ b/queue-3.16/video-uvesafb-fix-integer-overflow-in-allocation.patch @@ -0,0 +1,29 @@ +From: Kees Cook <keescook@chromium.org> +Date: Fri, 11 May 2018 18:24:12 +1000 +Subject: video: uvesafb: Fix integer overflow in allocation + +commit 9f645bcc566a1e9f921bdae7528a01ced5bc3713 upstream. + +cmap->len can get close to INT_MAX/2, allowing for an integer overflow in +allocation. This uses kmalloc_array() instead to catch the condition. + +Reported-by: Dr Silvio Cesare of InfoSect <silvio.cesare@gmail.com> +Fixes: 8bdb3a2d7df48 ("uvesafb: the driver core") +Signed-off-by: Kees Cook <keescook@chromium.org> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + drivers/video/fbdev/uvesafb.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/video/fbdev/uvesafb.c ++++ b/drivers/video/fbdev/uvesafb.c +@@ -1059,7 +1059,8 @@ static int uvesafb_setcmap(struct fb_cma + info->cmap.len || cmap->start < info->cmap.start) + return -EINVAL; + +- entries = kmalloc(sizeof(*entries) * cmap->len, GFP_KERNEL); ++ entries = kmalloc_array(cmap->len, sizeof(*entries), ++ GFP_KERNEL); + if (!entries) + return -ENOMEM; + diff --git a/queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch b/queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch new file mode 100644 index 00000000..44b7783f --- /dev/null +++ b/queue-3.16/xfs-catch-inode-allocation-state-mismatch-corruption.patch @@ -0,0 +1,182 @@ +From: Dave Chinner <dchinner@redhat.com> +Date: Fri, 23 Mar 2018 10:22:53 -0700 +Subject: xfs: catch inode allocation state mismatch corruption + +commit ee457001ed6c6f31ddad69c24c1da8f377d8472d upstream. + +We recently came across a V4 filesystem causing memory corruption +due to a newly allocated inode being setup twice and being added to +the superblock inode list twice. From code inspection, the only way +this could happen is if a newly allocated inode was not marked as +free on disk (i.e. di_mode wasn't zero). + +Running the metadump on an upstream debug kernel fails during inode +allocation like so: + +XFS: Assertion failed: ip->i_d.di_nblocks == 0, file: fs/xfs/xfs_inod= +e.c, line: 838 + ------------[ cut here ]------------ +kernel BUG at fs/xfs/xfs_message.c:114! +invalid opcode: 0000 [#1] PREEMPT SMP +CPU: 11 PID: 3496 Comm: mkdir Not tainted 4.16.0-rc5-dgc #442 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/0= +1/2014 +RIP: 0010:assfail+0x28/0x30 +RSP: 0018:ffffc9000236fc80 EFLAGS: 00010202 +RAX: 00000000ffffffea RBX: 0000000000004000 RCX: 0000000000000000 +RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff8227211b +RBP: ffffc9000236fce8 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000bec R11: f000000000000000 R12: ffffc9000236fd30 +R13: ffff8805c76bab80 R14: ffff8805c77ac800 R15: ffff88083fb12e10 +FS: 00007fac8cbff040(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000= +000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fffa6783ff8 CR3: 00000005c6e2b003 CR4: 00000000000606e0 +Call Trace: + xfs_ialloc+0x383/0x570 + xfs_dir_ialloc+0x6a/0x2a0 + xfs_create+0x412/0x670 + xfs_generic_create+0x1f7/0x2c0 + ? capable_wrt_inode_uidgid+0x3f/0x50 + vfs_mkdir+0xfb/0x1b0 + SyS_mkdir+0xcf/0xf0 + do_syscall_64+0x73/0x1a0 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 + +Extracting the inode number we crashed on from an event trace and +looking at it with xfs_db: + +xfs_db> inode 184452204 +xfs_db> p +core.magic = 0x494e +core.mode = 0100644 +core.version = 2 +core.format = 2 (extents) +core.nlinkv2 = 1 +core.onlink = 0 +..... + +Confirms that it is not a free inode on disk. xfs_repair +also trips over this inode: + +..... +zero length extent (off = 0, fsbno = 0) in ino 184452204 +correcting nextents for inode 184452204 +bad attribute fork in inode 184452204, would clear attr fork +bad nblocks 1 for inode 184452204, would reset to 0 +bad anextents 1 for inode 184452204, would reset to 0 +imap claims in-use inode 184452204 is free, would correct imap +would have cleared inode 184452204 +..... +disconnected inode 184452204, would move to lost+found + +And so we have a situation where the directory structure and the +inobt thinks the inode is free, but the inode on disk thinks it is +still in use. Where this corruption came from is not possible to +diagnose, but we can detect it and prevent the kernel from oopsing +on lookup. The reproducer now results in: + +$ sudo mkdir /mnt/scratch/{0,1,2,3,4,5}{0,1,2,3,4,5} +mkdir: cannot create directory =E2=80=98/mnt/scratch/00=E2=80=99: File ex= +ists +mkdir: cannot create directory =E2=80=98/mnt/scratch/01=E2=80=99: File ex= +ists +mkdir: cannot create directory =E2=80=98/mnt/scratch/03=E2=80=99: Structu= +re needs cleaning +mkdir: cannot create directory =E2=80=98/mnt/scratch/04=E2=80=99: Input/o= +utput error +mkdir: cannot create directory =E2=80=98/mnt/scratch/05=E2=80=99: Input/o= +utput error +.... + +And this corruption shutdown: + +[ 54.843517] XFS (loop0): Corruption detected! Free inode 0xafe846c not= + marked free on disk +[ 54.845885] XFS (loop0): Internal error xfs_trans_cancel at line 1023 = +of file fs/xfs/xfs_trans.c. Caller xfs_create+0x425/0x670 +[ 54.848994] CPU: 10 PID: 3541 Comm: mkdir Not tainted 4.16.0-rc5-dgc #= +443 +[ 54.850753] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO= +S 1.10.2-1 04/01/2014 +[ 54.852859] Call Trace: +[ 54.853531] dump_stack+0x85/0xc5 +[ 54.854385] xfs_trans_cancel+0x197/0x1c0 +[ 54.855421] xfs_create+0x425/0x670 +[ 54.856314] xfs_generic_create+0x1f7/0x2c0 +[ 54.857390] ? capable_wrt_inode_uidgid+0x3f/0x50 +[ 54.858586] vfs_mkdir+0xfb/0x1b0 +[ 54.859458] SyS_mkdir+0xcf/0xf0 +[ 54.860254] do_syscall_64+0x73/0x1a0 +[ 54.861193] entry_SYSCALL_64_after_hwframe+0x42/0xb7 +[ 54.862492] RIP: 0033:0x7fb73bddf547 +[ 54.863358] RSP: 002b:00007ffdaa553338 EFLAGS: 00000246 ORIG_RAX: 0000= +000000000053 +[ 54.865133] RAX: ffffffffffffffda RBX: 00007ffdaa55449a RCX: 00007fb73= +bddf547 +[ 54.866766] RDX: 0000000000000001 RSI: 00000000000001ff RDI: 00007ffda= +a55449a +[ 54.868432] RBP: 00007ffdaa55449a R08: 00000000000001ff R09: 00005623a= +8670dd0 +[ 54.870110] R10: 00007fb73be72d5b R11: 0000000000000246 R12: 000000000= +00001ff +[ 54.871752] R13: 00007ffdaa5534b0 R14: 0000000000000000 R15: 00007ffda= +a553500 +[ 54.873429] XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1= +024 of file fs/xfs/xfs_trans.c. Return address = ffffffff814cd050 +[ 54.882790] XFS (loop0): Corruption of in-memory data detected. Shutt= +ing down filesystem +[ 54.884597] XFS (loop0): Please umount the filesystem and rectify the = +problem(s) + +Note that this crash is only possible on v4 filesystemsi or v5 +filesystems mounted with the ikeep mount option. For all other V5 +filesystems, this problem cannot occur because we don't read inodes +we are allocating from disk - we simply overwrite them with the new +inode information. + +Signed-Off-By: Dave Chinner <dchinner@redhat.com> +Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com> +Tested-by: Carlos Maiolino <cmaiolino@redhat.com> +Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> +Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> +[bwh: Backported to 3.16: + - Look up mode in XFS inode, not VFS inode + - Use positive error codes, and EIO instead of EFSCORRUPTED] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/xfs/xfs_icache.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -293,7 +293,28 @@ xfs_iget_cache_miss( + + trace_xfs_iget_miss(ip); + +- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { ++ ++ /* ++ * If we are allocating a new inode, then check what was returned is ++ * actually a free, empty inode. If we are not allocating an inode, ++ * the check we didn't find a free inode. ++ */ ++ if (flags & XFS_IGET_CREATE) { ++ if (ip->i_d.di_mode != 0) { ++ xfs_warn(mp, ++"Corruption detected! Free inode 0x%llx not marked free on disk", ++ ino); ++ error = EIO; ++ goto out_destroy; ++ } ++ if (ip->i_d.di_nblocks != 0) { ++ xfs_warn(mp, ++"Corruption detected! Free inode 0x%llx has blocks allocated!", ++ ino); ++ error = EIO; ++ goto out_destroy; ++ } ++ } else if (ip->i_d.di_mode == 0) { + error = ENOENT; + goto out_destroy; + } diff --git a/queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch b/queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch new file mode 100644 index 00000000..096549de --- /dev/null +++ b/queue-3.16/xfs-don-t-call-xfs_da_shrink_inode-with-null-bp.patch @@ -0,0 +1,41 @@ +From: Eric Sandeen <sandeen@sandeen.net> +Date: Fri, 8 Jun 2018 09:53:49 -0700 +Subject: xfs: don't call xfs_da_shrink_inode with NULL bp + +commit bb3d48dcf86a97dc25fe9fc2c11938e19cb4399a upstream. + +xfs_attr3_leaf_create may have errored out before instantiating a buffer, +for example if the blkno is out of range. In that case there is no work +to do to remove it, and in fact xfs_da_shrink_inode will lead to an oops +if we try. + +This also seems to fix a flaw where the original error from +xfs_attr3_leaf_create gets overwritten in the cleanup case, and it +removes a pointless assignment to bp which isn't used after this. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199969 +Reported-by: Xu, Wen <wen.xu@gatech.edu> +Tested-by: Xu, Wen <wen.xu@gatech.edu> +Signed-off-by: Eric Sandeen <sandeen@redhat.com> +Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> +Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> +[bwh: Backported to 3.16: adjust filename] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/xfs/xfs_attr_leaf.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/fs/xfs/xfs_attr_leaf.c ++++ b/fs/xfs/xfs_attr_leaf.c +@@ -701,9 +701,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t + ASSERT(blkno == 0); + error = xfs_attr3_leaf_create(args, blkno, &bp); + if (error) { +- error = xfs_da_shrink_inode(args, 0, bp); +- bp = NULL; +- if (error) ++ /* xfs_attr3_leaf_create may not have instantiated a block */ ++ if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0)) + goto out; + xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ + memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ diff --git a/queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch b/queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch new file mode 100644 index 00000000..e758bb09 --- /dev/null +++ b/queue-3.16/xfs-validate-cached-inodes-are-free-when-allocated.patch @@ -0,0 +1,155 @@ +From: Dave Chinner <dchinner@redhat.com> +Date: Tue, 17 Apr 2018 17:17:34 -0700 +Subject: xfs: validate cached inodes are free when allocated + +commit afca6c5b2595fc44383919fba740c194b0b76aff upstream. + +A recent fuzzed filesystem image cached random dcache corruption +when the reproducer was run. This often showed up as panics in +lookup_slow() on a null inode->i_ops pointer when doing pathwalks. + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 +.... +Call Trace: + lookup_slow+0x44/0x60 + walk_component+0x3dd/0x9f0 + link_path_walk+0x4a7/0x830 + path_lookupat+0xc1/0x470 + filename_lookup+0x129/0x270 + user_path_at_empty+0x36/0x40 + path_listxattr+0x98/0x110 + SyS_listxattr+0x13/0x20 + do_syscall_64+0xf5/0x280 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 + +but had many different failure modes including deadlocks trying to +lock the inode that was just allocated or KASAN reports of +use-after-free violations. + +The cause of the problem was a corrupt INOBT on a v4 fs where the +root inode was marked as free in the inobt record. Hence when we +allocated an inode, it chose the root inode to allocate, found it in +the cache and re-initialised it. + +We recently fixed a similar inode allocation issue caused by inobt +record corruption problem in xfs_iget_cache_miss() in commit +ee457001ed6c ("xfs: catch inode allocation state mismatch +corruption"). This change adds similar checks to the cache-hit path +to catch it, and turns the reproducer into a corruption shutdown +situation. + +Reported-by: Wen Xu <wen.xu@gatech.edu> +Signed-Off-By: Dave Chinner <dchinner@redhat.com> +Reviewed-by: Christoph Hellwig <hch@lst.de> +Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com> +Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> +[darrick: fix typos in comment] +Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> +[bwh: Backported to 3.16: + - Look up mode in XFS inode, not VFS inode + - Use positive error codes, and EIO instead of EFSCORRUPTED] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + fs/xfs/xfs_icache.c | 73 +++++++++++++++++++++++++++++---------------- + 1 file changed, 48 insertions(+), 25 deletions(-) + +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -133,6 +133,46 @@ xfs_inode_free( + } + + /* ++ * If we are allocating a new inode, then check what was returned is ++ * actually a free, empty inode. If we are not allocating an inode, ++ * then check we didn't find a free inode. ++ * ++ * Returns: ++ * 0 if the inode free state matches the lookup context ++ * ENOENT if the inode is free and we are not allocating ++ * EFSCORRUPTED if there is any state mismatch at all ++ */ ++static int ++xfs_iget_check_free_state( ++ struct xfs_inode *ip, ++ int flags) ++{ ++ if (flags & XFS_IGET_CREATE) { ++ /* should be a free inode */ ++ if (ip->i_d.di_mode != 0) { ++ xfs_warn(ip->i_mount, ++"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)", ++ ip->i_ino, ip->i_d.di_mode); ++ return EIO; ++ } ++ ++ if (ip->i_d.di_nblocks != 0) { ++ xfs_warn(ip->i_mount, ++"Corruption detected! Free inode 0x%llx has blocks allocated!", ++ ip->i_ino); ++ return EIO; ++ } ++ return 0; ++ } ++ ++ /* should be an allocated inode */ ++ if (ip->i_d.di_mode == 0) ++ return ENOENT; ++ ++ return 0; ++} ++ ++/* + * Check the validity of the inode we just found it the cache + */ + static int +@@ -181,12 +221,12 @@ xfs_iget_cache_hit( + } + + /* +- * If lookup is racing with unlink return an error immediately. ++ * Check the inode free state is valid. This also detects lookup ++ * racing with unlinks. + */ +- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { +- error = ENOENT; ++ error = xfs_iget_check_free_state(ip, flags); ++ if (error) + goto out_error; +- } + + /* + * If IRECLAIMABLE is set, we've torn down the VFS inode already. +@@ -295,29 +335,12 @@ xfs_iget_cache_miss( + + + /* +- * If we are allocating a new inode, then check what was returned is +- * actually a free, empty inode. If we are not allocating an inode, +- * the check we didn't find a free inode. ++ * Check the inode free state is valid. This also detects lookup ++ * racing with unlinks. + */ +- if (flags & XFS_IGET_CREATE) { +- if (ip->i_d.di_mode != 0) { +- xfs_warn(mp, +-"Corruption detected! Free inode 0x%llx not marked free on disk", +- ino); +- error = EIO; +- goto out_destroy; +- } +- if (ip->i_d.di_nblocks != 0) { +- xfs_warn(mp, +-"Corruption detected! Free inode 0x%llx has blocks allocated!", +- ino); +- error = EIO; +- goto out_destroy; +- } +- } else if (ip->i_d.di_mode == 0) { +- error = ENOENT; ++ error = xfs_iget_check_free_state(ip, flags); ++ if (error) + goto out_destroy; +- } + + /* + * Preload the radix tree so we can insert safely under the |