diff options
author | Zefan Li <lizefan@huawei.com> | 2016-05-24 23:27:42 +0800 |
---|---|---|
committer | Zefan Li <lizefan@huawei.com> | 2016-05-24 23:44:47 +0800 |
commit | babb6468810a81fa15c7a1a02d21e28452f3d270 (patch) | |
tree | 101c5b3ccc406801d30bedc45dde4cbe366a0ef8 | |
parent | 6a2763a8589df1c43fa27da01ec44aa8f67194b5 (diff) | |
download | linux-3.4.y-queue-babb6468810a81fa15c7a1a02d21e28452f3d270.tar.gz |
Add commits to 3.4.y, up to 4.4
51 files changed, 3413 insertions, 0 deletions
diff --git a/patches/9p-evict_inode-should-kick-out-i_data-not-i_mapping.patch b/patches/9p-evict_inode-should-kick-out-i_data-not-i_mapping.patch new file mode 100644 index 0000000..07b2d02 --- /dev/null +++ b/patches/9p-evict_inode-should-kick-out-i_data-not-i_mapping.patch @@ -0,0 +1,52 @@ +From 4ad78628445d26e5e9487b2e8f23274ad7b0f5d3 Mon Sep 17 00:00:00 2001 +From: Al Viro <viro@zeniv.linux.org.uk> +Date: Tue, 8 Dec 2015 03:07:22 -0500 +Subject: 9p: ->evict_inode() should kick out ->i_data, not ->i_mapping + +commit 4ad78628445d26e5e9487b2e8f23274ad7b0f5d3 upstream. + +For block devices the pagecache is associated with the inode +on bdevfs, not with the aliasing ones on the mountable filesystems. +The latter have its own ->i_data empty and ->i_mapping pointing +to the (unique per major/minor) bdevfs inode. That guarantees +cache coherence between all block device inodes with the same +device number. + +Eviction of an alias inode has no business trying to evict the +pages belonging to bdevfs one; moreover, ->i_mapping is only +safe to access when the thing is opened. At the time of +->evict_inode() the victim is definitely *not* opened. We are +about to kill the address space embedded into struct inode +(inode->i_data) and that's what we need to empty of any pages. + +9p instance tries to empty inode->i_mapping instead, which is +both unsafe and bogus - if we have several device nodes with +the same device number in different places, closing one of them +should not try to empty the (shared) page cache. + +Fortunately, other instances in the tree are OK; they are +evicting from &inode->i_data instead, as 9p one should. + +Reported-by: "Suzuki K. Poulose" <Suzuki.Poulose@arm.com> +Tested-by: "Suzuki K. Poulose" <Suzuki.Poulose@arm.com> +Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + fs/9p/vfs_inode.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/9p/vfs_inode.c ++++ b/fs/9p/vfs_inode.c +@@ -447,9 +447,9 @@ void v9fs_evict_inode(struct inode *inod + { + struct v9fs_inode *v9inode = V9FS_I(inode); + +- truncate_inode_pages(inode->i_mapping, 0); ++ truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); +- filemap_fdatawrite(inode->i_mapping); ++ filemap_fdatawrite(&inode->i_data); + + #ifdef CONFIG_9P_FSCACHE + v9fs_cache_inode_put_cookie(inode); diff --git a/patches/ahci-fix-softreset-failed-issue-of-port-multiplier.patch b/patches/ahci-fix-softreset-failed-issue-of-port-multiplier.patch new file mode 100644 index 0000000..a9d741e --- /dev/null +++ b/patches/ahci-fix-softreset-failed-issue-of-port-multiplier.patch @@ -0,0 +1,43 @@ +From 023113d24ef9e1d2b44cb2446872b17e2b01d8b1 Mon Sep 17 00:00:00 2001 +From: Xiangliang Yu <Xiangliang.Yu@amd.com> +Date: Thu, 26 Nov 2015 20:27:02 +0800 +Subject: AHCI: Fix softreset failed issue of Port Multiplier + +commit 023113d24ef9e1d2b44cb2446872b17e2b01d8b1 upstream. + +Current code doesn't update port value of Port Multiplier(PM) when +sending FIS of softreset to device, command will fail if FBS is +enabled. + +There are two ways to fix the issue: the first is to disable FBS +before sending softreset command to PM device and the second is +to update port value of PM when sending command. + +For the first way, i can't find any related rule in AHCI Spec. The +second way can avoid disabling FBS and has better performance. + +Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com> +Signed-off-by: Tejun Heo <tj@kernel.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/ata/libahci.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -1228,6 +1228,15 @@ static int ahci_exec_polled_cmd(struct a + ata_tf_to_fis(tf, pmp, is_cmd, fis); + ahci_fill_cmd_slot(pp, 0, cmd_fis_len | flags | (pmp << 12)); + ++ /* set port value for softreset of Port Multiplier */ ++ if (pp->fbs_enabled && pp->fbs_last_dev != pmp) { ++ tmp = readl(port_mmio + PORT_FBS); ++ tmp &= ~(PORT_FBS_DEV_MASK | PORT_FBS_DEC); ++ tmp |= pmp << PORT_FBS_DEV_OFFSET; ++ writel(tmp, port_mmio + PORT_FBS); ++ pp->fbs_last_dev = pmp; ++ } ++ + /* issue & wait */ + writel(1, port_mmio + PORT_CMD_ISSUE); + diff --git a/patches/alsa-rme96-fix-unexpected-volume-reset-after-rate-changes.patch b/patches/alsa-rme96-fix-unexpected-volume-reset-after-rate-changes.patch new file mode 100644 index 0000000..821c196 --- /dev/null +++ b/patches/alsa-rme96-fix-unexpected-volume-reset-after-rate-changes.patch @@ -0,0 +1,104 @@ +From a74a821624c0c75388a193337babd17a8c02c740 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai <tiwai@suse.de> +Date: Fri, 4 Dec 2015 16:44:24 +0100 +Subject: ALSA: rme96: Fix unexpected volume reset after rate changes + +commit a74a821624c0c75388a193337babd17a8c02c740 upstream. + +rme96 driver needs to reset DAC depending on the sample rate, and this +results in resetting to the max volume suddenly. It's because of the +missing call of snd_rme96_apply_dac_volume(). + +However, calling this function right after the DAC reset still may not +work, and we need some delay before this call. Since the DAC reset +and the procedure after that are performed in the spinlock, we delay +the DAC volume restore at the end after the spinlock. + +Reported-and-tested-by: Sylvain LABOISNE <maeda1@free.fr> +Signed-off-by: Takashi Iwai <tiwai@suse.de> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + sound/pci/rme96.c | 41 ++++++++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 15 deletions(-) + +--- a/sound/pci/rme96.c ++++ b/sound/pci/rme96.c +@@ -704,10 +704,11 @@ snd_rme96_playback_setrate(struct rme96 + { + /* change to/from double-speed: reset the DAC (if available) */ + snd_rme96_reset_dac(rme96); ++ return 1; /* need to restore volume */ + } else { + writel(rme96->wcreg, rme96->iobase + RME96_IO_CONTROL_REGISTER); ++ return 0; + } +- return 0; + } + + static int +@@ -945,6 +946,7 @@ snd_rme96_playback_hw_params(struct snd_ + struct rme96 *rme96 = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + int err, rate, dummy; ++ bool apply_dac_volume = false; + + runtime->dma_area = (void __force *)(rme96->iobase + + RME96_IO_PLAY_BUFFER); +@@ -958,24 +960,26 @@ snd_rme96_playback_hw_params(struct snd_ + { + /* slave clock */ + if ((int)params_rate(params) != rate) { +- spin_unlock_irq(&rme96->lock); +- return -EIO; +- } +- } else if ((err = snd_rme96_playback_setrate(rme96, params_rate(params))) < 0) { +- spin_unlock_irq(&rme96->lock); +- return err; +- } +- if ((err = snd_rme96_playback_setformat(rme96, params_format(params))) < 0) { +- spin_unlock_irq(&rme96->lock); +- return err; ++ err = -EIO; ++ goto error; ++ } ++ } else { ++ err = snd_rme96_playback_setrate(rme96, params_rate(params)); ++ if (err < 0) ++ goto error; ++ apply_dac_volume = err > 0; /* need to restore volume later? */ + } ++ ++ err = snd_rme96_playback_setformat(rme96, params_format(params)); ++ if (err < 0) ++ goto error; + snd_rme96_setframelog(rme96, params_channels(params), 1); + if (rme96->capture_periodsize != 0) { + if (params_period_size(params) << rme96->playback_frlog != + rme96->capture_periodsize) + { +- spin_unlock_irq(&rme96->lock); +- return -EBUSY; ++ err = -EBUSY; ++ goto error; + } + } + rme96->playback_periodsize = +@@ -986,9 +990,16 @@ snd_rme96_playback_hw_params(struct snd_ + rme96->wcreg &= ~(RME96_WCR_PRO | RME96_WCR_DOLBY | RME96_WCR_EMP); + writel(rme96->wcreg |= rme96->wcreg_spdif_stream, rme96->iobase + RME96_IO_CONTROL_REGISTER); + } ++ ++ err = 0; ++ error: + spin_unlock_irq(&rme96->lock); +- +- return 0; ++ if (apply_dac_volume) { ++ usleep_range(3000, 10000); ++ snd_rme96_apply_dac_volume(rme96); ++ } ++ ++ return err; + } + + static int diff --git a/patches/arm-8471-1-need-to-save-restore-arm-register-r11-when-it-is-corrupted.patch b/patches/arm-8471-1-need-to-save-restore-arm-register-r11-when-it-is-corrupted.patch new file mode 100644 index 0000000..c3056e9 --- /dev/null +++ b/patches/arm-8471-1-need-to-save-restore-arm-register-r11-when-it-is-corrupted.patch @@ -0,0 +1,48 @@ +From fa0708b320f6da4c1104fe56e01b7abf66fd16ad Mon Sep 17 00:00:00 2001 +From: Anson Huang <Anson.Huang@freescale.com> +Date: Mon, 7 Dec 2015 10:09:19 +0100 +Subject: ARM: 8471/1: need to save/restore arm register(r11) when it is + corrupted + +commit fa0708b320f6da4c1104fe56e01b7abf66fd16ad upstream. + +In cpu_v7_do_suspend routine, r11 is used while it is NOT +saved/restored, different compiler may have different usage +of ARM general registers, so it may cause issues during +calling cpu_v7_do_suspend. + +We meet kernel fault occurs when using GCC 4.8.3, r11 contains +valid value before calling into cpu_v7_do_suspend, but when returned +from this routine, r11 is corrupted and lead to kernel fault. +Doing save/restore for those corrupted registers is a must in +assemble code. + +Signed-off-by: Anson Huang <Anson.Huang@freescale.com> +Reviewed-by: Nicolas Pitre <nico@linaro.org> +Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + arch/arm/mm/proc-v7.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm/mm/proc-v7.S ++++ b/arch/arm/mm/proc-v7.S +@@ -94,7 +94,7 @@ ENDPROC(cpu_v7_dcache_clean_area) + .equ cpu_v7_suspend_size, 4 * 8 + #ifdef CONFIG_ARM_CPU_SUSPEND + ENTRY(cpu_v7_do_suspend) +- stmfd sp!, {r4 - r10, lr} ++ stmfd sp!, {r4 - r11, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID + stmia r0!, {r4 - r5} +@@ -105,7 +105,7 @@ ENTRY(cpu_v7_do_suspend) + mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register + mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control + stmia r0, {r6 - r11} +- ldmfd sp!, {r4 - r10, pc} ++ ldmfd sp!, {r4 - r11, pc} + ENDPROC(cpu_v7_do_suspend) + + ENTRY(cpu_v7_do_resume) diff --git a/patches/crypto-skcipher-copy-iv-from-desc-even-for-0-len-walks.patch b/patches/crypto-skcipher-copy-iv-from-desc-even-for-0-len-walks.patch new file mode 100644 index 0000000..1d83aec --- /dev/null +++ b/patches/crypto-skcipher-copy-iv-from-desc-even-for-0-len-walks.patch @@ -0,0 +1,60 @@ +From 70d906bc17500edfa9bdd8c8b7e59618c7911613 Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" <Jason@zx2c4.com> +Date: Sun, 6 Dec 2015 02:51:37 +0100 +Subject: crypto: skcipher - Copy iv from desc even for 0-len walks + +commit 70d906bc17500edfa9bdd8c8b7e59618c7911613 upstream. + +Some ciphers actually support encrypting zero length plaintexts. For +example, many AEAD modes support this. The resulting ciphertext for +those winds up being only the authentication tag, which is a result of +the key, the iv, the additional data, and the fact that the plaintext +had zero length. The blkcipher constructors won't copy the IV to the +right place, however, when using a zero length input, resulting in +some significant problems when ciphers call their initialization +routines, only to find that the ->iv parameter is uninitialized. One +such example of this would be using chacha20poly1305 with a zero length +input, which then calls chacha20, which calls the key setup routine, +which eventually OOPSes due to the uninitialized ->iv member. + +Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + crypto/ablkcipher.c | 2 +- + crypto/blkcipher.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/crypto/ablkcipher.c ++++ b/crypto/ablkcipher.c +@@ -280,12 +280,12 @@ static int ablkcipher_walk_first(struct + if (WARN_ON_ONCE(in_irq())) + return -EDEADLK; + ++ walk->iv = req->info; + walk->nbytes = walk->total; + if (unlikely(!walk->total)) + return 0; + + walk->iv_buffer = NULL; +- walk->iv = req->info; + if (unlikely(((unsigned long)walk->iv & alignmask))) { + int err = ablkcipher_copy_iv(walk, tfm, alignmask); + if (err) +--- a/crypto/blkcipher.c ++++ b/crypto/blkcipher.c +@@ -329,12 +329,12 @@ static int blkcipher_walk_first(struct b + if (WARN_ON_ONCE(in_irq())) + return -EDEADLK; + ++ walk->iv = desc->info; + walk->nbytes = walk->total; + if (unlikely(!walk->total)) + return 0; + + walk->buffer = NULL; +- walk->iv = desc->info; + if (unlikely(((unsigned long)walk->iv & alignmask))) { + int err = blkcipher_copy_iv(walk, tfm, alignmask); + if (err) diff --git a/patches/dm-btree-fix-bufio-buffer-leaks-in-dm_btree_del-error-path.patch b/patches/dm-btree-fix-bufio-buffer-leaks-in-dm_btree_del-error-path.patch new file mode 100644 index 0000000..b243231 --- /dev/null +++ b/patches/dm-btree-fix-bufio-buffer-leaks-in-dm_btree_del-error-path.patch @@ -0,0 +1,57 @@ +From ed8b45a3679eb49069b094c0711b30833f27c734 Mon Sep 17 00:00:00 2001 +From: Joe Thornber <ejt@redhat.com> +Date: Thu, 10 Dec 2015 14:37:53 +0000 +Subject: dm btree: fix bufio buffer leaks in dm_btree_del() error path + +commit ed8b45a3679eb49069b094c0711b30833f27c734 upstream. + +If dm_btree_del()'s call to push_frame() fails, e.g. due to +btree_node_validator finding invalid metadata, the dm_btree_del() error +path must unlock all frames (which have active dm-bufio buffers) that +were pushed onto the del_stack. + +Otherwise, dm_bufio_client_destroy() will BUG_ON() because dm-bufio +buffers have leaked, e.g.: + device-mapper: bufio: leaked buffer 3, hold count 1, list 0 + +Signed-off-by: Joe Thornber <ejt@redhat.com> +Signed-off-by: Mike Snitzer <snitzer@redhat.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/md/persistent-data/dm-btree.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +--- a/drivers/md/persistent-data/dm-btree.c ++++ b/drivers/md/persistent-data/dm-btree.c +@@ -230,6 +230,16 @@ static void pop_frame(struct del_stack * + dm_tm_unlock(s->tm, f->b); + } + ++static void unlock_all_frames(struct del_stack *s) ++{ ++ struct frame *f; ++ ++ while (unprocessed_frames(s)) { ++ f = s->spine + s->top--; ++ dm_tm_unlock(s->tm, f->b); ++ } ++} ++ + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) + { + int r; +@@ -285,9 +295,13 @@ int dm_btree_del(struct dm_btree_info *i + f->current_child = f->nr_children; + } + } +- + out: ++ if (r) { ++ /* cleanup all frames of del_stack */ ++ unlock_all_frames(s); ++ } + kfree(s); ++ + return r; + } + EXPORT_SYMBOL_GPL(dm_btree_del); diff --git a/patches/dm-btree-fix-leak-of-bufio-backed-block-in-btree_split_sibling-error-path.patch b/patches/dm-btree-fix-leak-of-bufio-backed-block-in-btree_split_sibling-error-path.patch new file mode 100644 index 0000000..9a85573 --- /dev/null +++ b/patches/dm-btree-fix-leak-of-bufio-backed-block-in-btree_split_sibling-error-path.patch @@ -0,0 +1,35 @@ +From 30ce6e1cc5a0f781d60227e9096c86e188d2c2bd Mon Sep 17 00:00:00 2001 +From: Mike Snitzer <snitzer@redhat.com> +Date: Mon, 23 Nov 2015 16:24:45 -0500 +Subject: dm btree: fix leak of bufio-backed block in btree_split_sibling error + path + +commit 30ce6e1cc5a0f781d60227e9096c86e188d2c2bd upstream. + +The block allocated at the start of btree_split_sibling() is never +released if later insert_at() fails. + +Fix this by releasing the previously allocated bufio block using +unlock_block(). + +Reported-by: Mikulas Patocka <mpatocka@redhat.com> +Signed-off-by: Mike Snitzer <snitzer@redhat.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/md/persistent-data/dm-btree.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/md/persistent-data/dm-btree.c ++++ b/drivers/md/persistent-data/dm-btree.c +@@ -450,8 +450,10 @@ static int btree_split_sibling(struct sh + + r = insert_at(sizeof(__le64), pn, parent_index + 1, + le64_to_cpu(rn->keys[0]), &location); +- if (r) ++ if (r) { ++ unlock_block(s->info, right); + return r; ++ } + + if (key < le64_to_cpu(rn->keys[0])) { + unlock_block(s->info, right); diff --git a/patches/drm-ttm-fixed-a-read-write-lock-imbalance.patch b/patches/drm-ttm-fixed-a-read-write-lock-imbalance.patch new file mode 100644 index 0000000..6ef3ca1 --- /dev/null +++ b/patches/drm-ttm-fixed-a-read-write-lock-imbalance.patch @@ -0,0 +1,31 @@ +From 025af189fb44250206dd8a32fa4a682392af3301 Mon Sep 17 00:00:00 2001 +From: Thomas Hellstrom <thellstrom@vmware.com> +Date: Fri, 20 Nov 2015 11:43:50 -0800 +Subject: drm/ttm: Fixed a read/write lock imbalance + +commit 025af189fb44250206dd8a32fa4a682392af3301 upstream. + +In ttm_write_lock(), the uninterruptible path should call +__ttm_write_lock() not __ttm_read_lock(). This fixes a vmwgfx hang +on F23 start up. + +syeh: Extracted this from one of Thomas' internal patches. + +Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> +Reviewed-by: Sinclair Yeh <syeh@vmware.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/gpu/drm/ttm/ttm_lock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/ttm/ttm_lock.c ++++ b/drivers/gpu/drm/ttm/ttm_lock.c +@@ -180,7 +180,7 @@ int ttm_write_lock(struct ttm_lock *lock + spin_unlock(&lock->lock); + } + } else +- wait_event(lock->queue, __ttm_read_lock(lock)); ++ wait_event(lock->queue, __ttm_write_lock(lock)); + + return ret; + } diff --git a/patches/ext4-fix-handling-of-extended-tv_sec.patch b/patches/ext4-fix-handling-of-extended-tv_sec.patch new file mode 100644 index 0000000..07074f3 --- /dev/null +++ b/patches/ext4-fix-handling-of-extended-tv_sec.patch @@ -0,0 +1,108 @@ +From a4dad1ae24f850410c4e60f22823cba1289b8d52 Mon Sep 17 00:00:00 2001 +From: David Turner <novalis@novalis.org> +Date: Tue, 24 Nov 2015 14:34:37 -0500 +Subject: ext4: Fix handling of extended tv_sec + +commit a4dad1ae24f850410c4e60f22823cba1289b8d52 upstream. + +In ext4, the bottom two bits of {a,c,m}time_extra are used to extend +the {a,c,m}time fields, deferring the year 2038 problem to the year +2446. + +When decoding these extended fields, for times whose bottom 32 bits +would represent a negative number, sign extension causes the 64-bit +extended timestamp to be negative as well, which is not what's +intended. This patch corrects that issue, so that the only negative +{a,c,m}times are those between 1901 and 1970 (as per 32-bit signed +timestamps). + +Some older kernels might have written pre-1970 dates with 1,1 in the +extra bits. This patch treats those incorrectly-encoded dates as +pre-1970, instead of post-2311, until kernel 4.20 is released. +Hopefully by then e2fsck will have fixed up the bad data. + +Also add a comment explaining the encoding of ext4's extra {a,c,m}time +bits. + +Signed-off-by: David Turner <novalis@novalis.org> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Reported-by: Mark Harris <mh8928@yahoo.com> +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=23732 +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + fs/ext4/ext4.h | 51 ++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 44 insertions(+), 7 deletions(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -26,6 +26,7 @@ + #include <linux/seqlock.h> + #include <linux/mutex.h> + #include <linux/timer.h> ++#include <linux/version.h> + #include <linux/wait.h> + #include <linux/blockgroup_lock.h> + #include <linux/percpu_counter.h> +@@ -704,19 +705,55 @@ struct move_extent { + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + ++/* ++ * We use an encoding that preserves the times for extra epoch "00": ++ * ++ * extra msb of adjust for signed ++ * epoch 32-bit 32-bit tv_sec to ++ * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range ++ * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 ++ * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 ++ * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 ++ * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 ++ * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 ++ * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 ++ * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 ++ * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 ++ * ++ * Note that previous versions of the kernel on 64-bit systems would ++ * incorrectly use extra epoch bits 1,1 for dates between 1901 and ++ * 1970. e2fsck will correct this, assuming that it is run on the ++ * affected filesystem before 2242. ++ */ ++ + static inline __le32 ext4_encode_extra_time(struct timespec *time) + { +- return cpu_to_le32((sizeof(time->tv_sec) > 4 ? +- (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | +- ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); ++ u32 extra = sizeof(time->tv_sec) > 4 ? ++ ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; ++ return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); + } + + static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) + { +- if (sizeof(time->tv_sec) > 4) +- time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) +- << 32; +- time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; ++ if (unlikely(sizeof(time->tv_sec) > 4 && ++ (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0) ++ /* Handle legacy encoding of pre-1970 dates with epoch ++ * bits 1,1. We assume that by kernel version 4.20, ++ * everyone will have run fsck over the affected ++ * filesystems to correct the problem. (This ++ * backwards compatibility may be removed before this ++ * time, at the discretion of the ext4 developers.) ++ */ ++ u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK; ++ if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0) ++ extra_bits = 0; ++ time->tv_sec += extra_bits << 32; ++#else ++ time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; ++#endif ++ } ++ time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + } + + #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ diff --git a/patches/ftrace-scripts-fix-incorrect-use-of-sprintf-in-recordmcount.patch b/patches/ftrace-scripts-fix-incorrect-use-of-sprintf-in-recordmcount.patch new file mode 100644 index 0000000..bf3fc3d --- /dev/null +++ b/patches/ftrace-scripts-fix-incorrect-use-of-sprintf-in-recordmcount.patch @@ -0,0 +1,37 @@ +From 713a3e4de707fab49d5aa4bceb77db1058572a7b Mon Sep 17 00:00:00 2001 +From: Colin Ian King <colin.king@canonical.com> +Date: Wed, 30 Dec 2015 23:06:41 +0000 +Subject: ftrace/scripts: Fix incorrect use of sprintf in recordmcount + +commit 713a3e4de707fab49d5aa4bceb77db1058572a7b upstream. + +Fix build warning: + +scripts/recordmcount.c:589:4: warning: format not a string +literal and no format arguments [-Wformat-security] + sprintf("%s: failed\n", file); + +Fixes: a50bd43935586 ("ftrace/scripts: Have recordmcount copy the object file") +Link: http://lkml.kernel.org/r/1451516801-16951-1-git-send-email-colin.king@canonical.com + +Cc: Li Bin <huawei.libin@huawei.com> +Cc: Russell King <rmk+kernel@arm.linux.org.uk> +Cc: Will Deacon <will.deacon@arm.com> +Signed-off-by: Colin Ian King <colin.king@canonical.com> +Signed-off-by: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + scripts/recordmcount.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/scripts/recordmcount.c ++++ b/scripts/recordmcount.c +@@ -546,7 +546,7 @@ main(int argc, char *argv[]) + do_file(file); + break; + case SJ_FAIL: /* error in do_file or below */ +- sprintf("%s: failed\n", file); ++ fprintf(stderr, "%s: failed\n", file); + ++n_error; + break; + case SJ_SUCCEED: /* premature success */ diff --git a/patches/ftrace-scripts-have-recordmcount-copy-the-object-file.patch b/patches/ftrace-scripts-have-recordmcount-copy-the-object-file.patch new file mode 100644 index 0000000..44f6bb2 --- /dev/null +++ b/patches/ftrace-scripts-have-recordmcount-copy-the-object-file.patch @@ -0,0 +1,255 @@ +From a50bd43935586420fb75f4558369eb08566fac5e Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org> +Date: Tue, 15 Dec 2015 16:06:10 -0500 +Subject: ftrace/scripts: Have recordmcount copy the object file + +commit a50bd43935586420fb75f4558369eb08566fac5e upstream. + +Russell King found that he had weird side effects when compiling the kernel +with hard linked ccache. The reason was that recordmcount modified the +kernel in place via mmap, and when a file gets modified twice by +recordmcount, it will complain about it. To fix this issue, Russell wrote a +patch that checked if the file was hard linked more than once and would +unlink it if it was. + +Linus Torvalds was not happy with the fact that recordmcount does this in +place modification. Instead of doing the unlink only if the file has two or +more hard links, it does the unlink all the time. In otherwords, it always +does a copy if it changed something. That is, it does the write out if a +change was made. + +Signed-off-by: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + scripts/recordmcount.c | 145 +++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 110 insertions(+), 35 deletions(-) + +--- a/scripts/recordmcount.c ++++ b/scripts/recordmcount.c +@@ -35,12 +35,17 @@ + + static int fd_map; /* File descriptor for file being modified. */ + static int mmap_failed; /* Boolean flag. */ +-static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ + static char gpfx; /* prefix for global symbol name (sometimes '_') */ + static struct stat sb; /* Remember .st_size, etc. */ + static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */ + static const char *altmcount; /* alternate mcount symbol name */ + static int warn_on_notrace_sect; /* warn when section has mcount not being recorded */ ++static void *file_map; /* pointer of the mapped file */ ++static void *file_end; /* pointer to the end of the mapped file */ ++static int file_updated; /* flag to state file was changed */ ++static void *file_ptr; /* current file pointer location */ ++static void *file_append; /* added to the end of the file */ ++static size_t file_append_size; /* how much is added to end of file */ + + /* setjmp() return values */ + enum { +@@ -54,10 +59,14 @@ static void + cleanup(void) + { + if (!mmap_failed) +- munmap(ehdr_curr, sb.st_size); ++ munmap(file_map, sb.st_size); + else +- free(ehdr_curr); +- close(fd_map); ++ free(file_map); ++ file_map = NULL; ++ free(file_append); ++ file_append = NULL; ++ file_append_size = 0; ++ file_updated = 0; + } + + static void __attribute__((noreturn)) +@@ -79,12 +88,22 @@ succeed_file(void) + static off_t + ulseek(int const fd, off_t const offset, int const whence) + { +- off_t const w = lseek(fd, offset, whence); +- if (w == (off_t)-1) { +- perror("lseek"); ++ switch (whence) { ++ case SEEK_SET: ++ file_ptr = file_map + offset; ++ break; ++ case SEEK_CUR: ++ file_ptr += offset; ++ break; ++ case SEEK_END: ++ file_ptr = file_map + (sb.st_size - offset); ++ break; ++ } ++ if (file_ptr < file_map) { ++ fprintf(stderr, "lseek: seek before file\n"); + fail_file(); + } +- return w; ++ return file_ptr - file_map; + } + + static size_t +@@ -101,12 +120,38 @@ uread(int const fd, void *const buf, siz + static size_t + uwrite(int const fd, void const *const buf, size_t const count) + { +- size_t const n = write(fd, buf, count); +- if (n != count) { +- perror("write"); +- fail_file(); ++ size_t cnt = count; ++ off_t idx = 0; ++ ++ file_updated = 1; ++ ++ if (file_ptr + count >= file_end) { ++ off_t aoffset = (file_ptr + count) - file_end; ++ ++ if (aoffset > file_append_size) { ++ file_append = realloc(file_append, aoffset); ++ file_append_size = aoffset; ++ } ++ if (!file_append) { ++ perror("write"); ++ fail_file(); ++ } ++ if (file_ptr < file_end) { ++ cnt = file_end - file_ptr; ++ } else { ++ cnt = 0; ++ idx = aoffset - count; ++ } + } +- return n; ++ ++ if (cnt) ++ memcpy(file_ptr, buf, cnt); ++ ++ if (cnt < count) ++ memcpy(file_append + idx, buf + cnt, count - cnt); ++ ++ file_ptr += count; ++ return count; + } + + static void * +@@ -163,9 +208,7 @@ static int make_nop_x86(void *map, size_ + */ + static void *mmap_file(char const *fname) + { +- void *addr; +- +- fd_map = open(fname, O_RDWR); ++ fd_map = open(fname, O_RDONLY); + if (fd_map < 0 || fstat(fd_map, &sb) < 0) { + perror(fname); + fail_file(); +@@ -174,29 +217,58 @@ static void *mmap_file(char const *fname + fprintf(stderr, "not a regular file: %s\n", fname); + fail_file(); + } +- addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, +- fd_map, 0); ++ file_map = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, ++ fd_map, 0); + mmap_failed = 0; +- if (addr == MAP_FAILED) { ++ if (file_map == MAP_FAILED) { + mmap_failed = 1; +- addr = umalloc(sb.st_size); +- uread(fd_map, addr, sb.st_size); ++ file_map = umalloc(sb.st_size); ++ uread(fd_map, file_map, sb.st_size); + } +- if (sb.st_nlink != 1) { +- /* file is hard-linked, break the hard link */ +- close(fd_map); +- if (unlink(fname) < 0) { +- perror(fname); +- fail_file(); +- } +- fd_map = open(fname, O_RDWR | O_CREAT, sb.st_mode); +- if (fd_map < 0) { +- perror(fname); ++ close(fd_map); ++ ++ file_end = file_map + sb.st_size; ++ ++ return file_map; ++} ++ ++static void write_file(const char *fname) ++{ ++ char tmp_file[strlen(fname) + 4]; ++ size_t n; ++ ++ if (!file_updated) ++ return; ++ ++ sprintf(tmp_file, "%s.rc", fname); ++ ++ /* ++ * After reading the entire file into memory, delete it ++ * and write it back, to prevent weird side effects of modifying ++ * an object file in place. ++ */ ++ fd_map = open(tmp_file, O_WRONLY | O_TRUNC | O_CREAT, sb.st_mode); ++ if (fd_map < 0) { ++ perror(fname); ++ fail_file(); ++ } ++ n = write(fd_map, file_map, sb.st_size); ++ if (n != sb.st_size) { ++ perror("write"); ++ fail_file(); ++ } ++ if (file_append_size) { ++ n = write(fd_map, file_append, file_append_size); ++ if (n != file_append_size) { ++ perror("write"); + fail_file(); + } +- uwrite(fd_map, addr, sb.st_size); + } +- return addr; ++ close(fd_map); ++ if (rename(tmp_file, fname) < 0) { ++ perror(fname); ++ fail_file(); ++ } + } + + /* w8rev, w8nat, ...: Handle endianness. */ +@@ -303,7 +375,6 @@ do_file(char const *const fname) + Elf32_Ehdr *const ehdr = mmap_file(fname); + unsigned int reltype = 0; + +- ehdr_curr = ehdr; + w = w4nat; + w2 = w2nat; + w8 = w8nat; +@@ -415,6 +486,7 @@ do_file(char const *const fname) + } + } /* end switch */ + ++ write_file(fname); + cleanup(); + } + +@@ -467,11 +539,14 @@ main(int argc, char *argv[]) + case SJ_SETJMP: /* normal sequence */ + /* Avoid problems if early cleanup() */ + fd_map = -1; +- ehdr_curr = NULL; + mmap_failed = 1; ++ file_map = NULL; ++ file_ptr = NULL; ++ file_updated = 0; + do_file(file); + break; + case SJ_FAIL: /* error in do_file or below */ ++ sprintf("%s: failed\n", file); + ++n_error; + break; + case SJ_SUCCEED: /* premature success */ diff --git a/patches/fuse-break-infinite-loop-in-fuse_fill_write_pages.patch b/patches/fuse-break-infinite-loop-in-fuse_fill_write_pages.patch new file mode 100644 index 0000000..b922870 --- /dev/null +++ b/patches/fuse-break-infinite-loop-in-fuse_fill_write_pages.patch @@ -0,0 +1,58 @@ +From 3ca8138f014a913f98e6ef40e939868e1e9ea876 Mon Sep 17 00:00:00 2001 +From: Roman Gushchin <klamm@yandex-team.ru> +Date: Mon, 12 Oct 2015 16:33:44 +0300 +Subject: fuse: break infinite loop in fuse_fill_write_pages() + +commit 3ca8138f014a913f98e6ef40e939868e1e9ea876 upstream. + +I got a report about unkillable task eating CPU. Further +investigation shows, that the problem is in the fuse_fill_write_pages() +function. If iov's first segment has zero length, we get an infinite +loop, because we never reach iov_iter_advance() call. + +Fix this by calling iov_iter_advance() before repeating an attempt to +copy data from userspace. + +A similar problem is described in 124d3b7041f ("fix writev regression: +pan hanging unkillable and un-straceable"). If zero-length segmend +is followed by segment with invalid address, +iov_iter_fault_in_readable() checks only first segment (zero-length), +iov_iter_copy_from_user_atomic() skips it, fails at second and +returns zero -> goto again without skipping zero-length segment. + +Patch calls iov_iter_advance() before goto again: we'll skip zero-length +segment at second iteraction and iov_iter_fault_in_readable() will detect +invalid address. + +Special thanks to Konstantin Khlebnikov, who helped a lot with the commit +description. + +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Maxim Patlasov <mpatlasov@parallels.com> +Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> +Signed-off-by: Roman Gushchin <klamm@yandex-team.ru> +Signed-off-by: Miklos Szeredi <miklos@szeredi.hu> +Fixes: ea9b9907b82a ("fuse: implement perform_write") +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + fs/fuse/file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -846,6 +846,7 @@ static ssize_t fuse_fill_write_pages(str + + mark_page_accessed(page); + ++ iov_iter_advance(ii, tmp); + if (!tmp) { + unlock_page(page); + page_cache_release(page); +@@ -857,7 +858,6 @@ static ssize_t fuse_fill_write_pages(str + req->pages[req->num_pages] = page; + req->num_pages++; + +- iov_iter_advance(ii, tmp); + count += tmp; + pos += tmp; + offset += tmp; diff --git a/patches/genirq-prevent-chip-buslock-deadlock.patch b/patches/genirq-prevent-chip-buslock-deadlock.patch new file mode 100644 index 0000000..44729e3 --- /dev/null +++ b/patches/genirq-prevent-chip-buslock-deadlock.patch @@ -0,0 +1,77 @@ +From abc7e40c81d113ef4bacb556f0a77ca63ac81d85 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun, 13 Dec 2015 18:12:30 +0100 +Subject: genirq: Prevent chip buslock deadlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit abc7e40c81d113ef4bacb556f0a77ca63ac81d85 upstream. + +If a interrupt chip utilizes chip->buslock then free_irq() can +deadlock in the following way: + +CPU0 CPU1 + interrupt(X) (Shared or spurious) +free_irq(X) interrupt_thread(X) +chip_bus_lock(X) + irq_finalize_oneshot(X) + chip_bus_lock(X) +synchronize_irq(X) + +synchronize_irq() waits for the interrupt thread to complete, +i.e. forever. + +Solution is simple: Drop chip_bus_lock() before calling +synchronize_irq() as we do with the irq_desc lock. There is nothing to +be protected after the point where irq_desc lock has been released. + +This adds chip_bus_lock/unlock() to the remove_irq() code path, but +that's actually correct in the case where remove_irq() is called on +such an interrupt. The current users of remove_irq() are not affected +as none of those interrupts is on a chip which requires buslock. + +Reported-by: Fredrik Markström <fredrik.markstrom@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + kernel/irq/manage.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -1181,6 +1181,7 @@ static struct irqaction *__free_irq(unsi + if (!desc) + return NULL; + ++ chip_bus_lock(desc); + raw_spin_lock_irqsave(&desc->lock, flags); + + /* +@@ -1194,7 +1195,7 @@ static struct irqaction *__free_irq(unsi + if (!action) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + raw_spin_unlock_irqrestore(&desc->lock, flags); +- ++ chip_bus_sync_unlock(desc); + return NULL; + } + +@@ -1223,6 +1224,7 @@ static struct irqaction *__free_irq(unsi + #endif + + raw_spin_unlock_irqrestore(&desc->lock, flags); ++ chip_bus_sync_unlock(desc); + + unregister_handler_proc(irq, action); + +@@ -1296,9 +1298,7 @@ void free_irq(unsigned int irq, void *de + desc->affinity_notify = NULL; + #endif + +- chip_bus_lock(desc); + kfree(__free_irq(irq, dev_id)); +- chip_bus_sync_unlock(desc); + } + EXPORT_SYMBOL(free_irq); + diff --git a/patches/ipv6-addrlabel-fix-ip6addrlbl_get.patch b/patches/ipv6-addrlabel-fix-ip6addrlbl_get.patch new file mode 100644 index 0000000..a9d061f --- /dev/null +++ b/patches/ipv6-addrlabel-fix-ip6addrlbl_get.patch @@ -0,0 +1,34 @@ +From e459dfeeb64008b2d23bdf600f03b3605dbb8152 Mon Sep 17 00:00:00 2001 +From: Andrey Ryabinin <aryabinin@virtuozzo.com> +Date: Mon, 21 Dec 2015 12:54:45 +0300 +Subject: ipv6/addrlabel: fix ip6addrlbl_get() + +commit e459dfeeb64008b2d23bdf600f03b3605dbb8152 upstream. + +ip6addrlbl_get() has never worked. If ip6addrlbl_hold() succeeded, +ip6addrlbl_get() will exit with '-ESRCH'. If ip6addrlbl_hold() failed, +ip6addrlbl_get() will use about to be free ip6addrlbl_entry pointer. + +Fix this by inverting ip6addrlbl_hold() check. + +Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") +Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> +Reviewed-by: Cong Wang <cwang@twopensource.com> +Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + net/ipv6/addrlabel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/addrlabel.c ++++ b/net/ipv6/addrlabel.c +@@ -558,7 +558,7 @@ static int ip6addrlbl_get(struct sk_buff + + rcu_read_lock(); + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); +- if (p && ip6addrlbl_hold(p)) ++ if (p && !ip6addrlbl_hold(p)) + p = NULL; + lseq = ip6addrlbl_table.seq; + rcu_read_unlock(); diff --git a/patches/jbd2-fix-unreclaimed-pages-after-truncate-in-data-journal-mode.patch b/patches/jbd2-fix-unreclaimed-pages-after-truncate-in-data-journal-mode.patch new file mode 100644 index 0000000..bcb089a --- /dev/null +++ b/patches/jbd2-fix-unreclaimed-pages-after-truncate-in-data-journal-mode.patch @@ -0,0 +1,59 @@ +From bc23f0c8d7ccd8d924c4e70ce311288cb3e61ea8 Mon Sep 17 00:00:00 2001 +From: Jan Kara <jack@suse.cz> +Date: Tue, 24 Nov 2015 15:34:35 -0500 +Subject: jbd2: Fix unreclaimed pages after truncate in data=journal mode + +commit bc23f0c8d7ccd8d924c4e70ce311288cb3e61ea8 upstream. + +Ted and Namjae have reported that truncated pages don't get timely +reclaimed after being truncated in data=journal mode. The following test +triggers the issue easily: + +for (i = 0; i < 1000; i++) { + pwrite(fd, buf, 1024*1024, 0); + fsync(fd); + fsync(fd); + ftruncate(fd, 0); +} + +The reason is that journal_unmap_buffer() finds that truncated buffers +are not journalled (jh->b_transaction == NULL), they are part of +checkpoint list of a transaction (jh->b_cp_transaction != NULL) and have +been already written out (!buffer_dirty(bh)). We clean such buffers but +we leave them in the checkpoint list. Since checkpoint transaction holds +a reference to the journal head, these buffers cannot be released until +the checkpoint transaction is cleaned up. And at that point we don't +call release_buffer_page() anymore so pages detached from mapping are +lingering in the system waiting for reclaim to find them and free them. + +Fix the problem by removing buffers from transaction checkpoint lists +when journal_unmap_buffer() finds out they don't have to be there +anymore. + +Reported-and-tested-by: Namjae Jeon <namjae.jeon@samsung.com> +Fixes: de1b794130b130e77ffa975bb58cb843744f9ae5 +Signed-off-by: Jan Kara <jack@suse.cz> +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + fs/jbd2/transaction.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -1904,6 +1904,7 @@ static int journal_unmap_buffer(journal_ + + if (!buffer_dirty(bh)) { + /* bdflush has written it. We can drop it now */ ++ __jbd2_journal_remove_checkpoint(jh); + goto zap_buffer; + } + +@@ -1941,6 +1942,7 @@ static int journal_unmap_buffer(journal_ + /* The orphan record's transaction has + * committed. We can cleanse this buffer */ + clear_buffer_jbddirty(bh); ++ __jbd2_journal_remove_checkpoint(jh); + goto zap_buffer; + } + } diff --git a/patches/keys-fix-race-between-read-and-revoke.patch b/patches/keys-fix-race-between-read-and-revoke.patch new file mode 100644 index 0000000..6157f75 --- /dev/null +++ b/patches/keys-fix-race-between-read-and-revoke.patch @@ -0,0 +1,112 @@ +From b4a1b4f5047e4f54e194681125c74c0aa64d637d Mon Sep 17 00:00:00 2001 +From: David Howells <dhowells@redhat.com> +Date: Fri, 18 Dec 2015 01:34:26 +0000 +Subject: KEYS: Fix race between read and revoke + +commit b4a1b4f5047e4f54e194681125c74c0aa64d637d upstream. + +This fixes CVE-2015-7550. + +There's a race between keyctl_read() and keyctl_revoke(). If the revoke +happens between keyctl_read() checking the validity of a key and the key's +semaphore being taken, then the key type read method will see a revoked key. + +This causes a problem for the user-defined key type because it assumes in +its read method that there will always be a payload in a non-revoked key +and doesn't check for a NULL pointer. + +Fix this by making keyctl_read() check the validity of a key after taking +semaphore instead of before. + +I think the bug was introduced with the original keyrings code. + +This was discovered by a multithreaded test program generated by syzkaller +(http://github.com/google/syzkaller). Here's a cleaned up version: + + #include <sys/types.h> + #include <keyutils.h> + #include <pthread.h> + void *thr0(void *arg) + { + key_serial_t key = (unsigned long)arg; + keyctl_revoke(key); + return 0; + } + void *thr1(void *arg) + { + key_serial_t key = (unsigned long)arg; + char buffer[16]; + keyctl_read(key, buffer, 16); + return 0; + } + int main() + { + key_serial_t key = add_key("user", "%", "foo", 3, KEY_SPEC_USER_KEYRING); + pthread_t th[5]; + pthread_create(&th[0], 0, thr0, (void *)(unsigned long)key); + pthread_create(&th[1], 0, thr1, (void *)(unsigned long)key); + pthread_create(&th[2], 0, thr0, (void *)(unsigned long)key); + pthread_create(&th[3], 0, thr1, (void *)(unsigned long)key); + pthread_join(th[0], 0); + pthread_join(th[1], 0); + pthread_join(th[2], 0); + pthread_join(th[3], 0); + return 0; + } + +Build as: + + cc -o keyctl-race keyctl-race.c -lkeyutils -lpthread + +Run as: + + while keyctl-race; do :; done + +as it may need several iterations to crash the kernel. The crash can be +summarised as: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 + IP: [<ffffffff81279b08>] user_read+0x56/0xa3 + ... + Call Trace: + [<ffffffff81276aa9>] keyctl_read_key+0xb6/0xd7 + [<ffffffff81277815>] SyS_keyctl+0x83/0xe0 + [<ffffffff815dbb97>] entry_SYSCALL_64_fastpath+0x12/0x6f + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: David Howells <dhowells@redhat.com> +Tested-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: James Morris <james.l.morris@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + security/keys/keyctl.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -702,16 +702,16 @@ long keyctl_read_key(key_serial_t keyid, + + /* the key is probably readable - now try to read it */ + can_read_key: +- ret = key_validate(key); +- if (ret == 0) { +- ret = -EOPNOTSUPP; +- if (key->type->read) { +- /* read the data with the semaphore held (since we +- * might sleep) */ +- down_read(&key->sem); ++ ret = -EOPNOTSUPP; ++ if (key->type->read) { ++ /* Read the data with the semaphore held (since we might sleep) ++ * to protect against the key being updated or revoked. ++ */ ++ down_read(&key->sem); ++ ret = key_validate(key); ++ if (ret == 0) + ret = key->type->read(key, buffer, buflen); +- up_read(&key->sem); +- } ++ up_read(&key->sem); + } + + error2: diff --git a/patches/misdn-fix-a-loop-count.patch b/patches/misdn-fix-a-loop-count.patch new file mode 100644 index 0000000..af3f6b0 --- /dev/null +++ b/patches/misdn-fix-a-loop-count.patch @@ -0,0 +1,59 @@ +From 40d24c4d8a7430aa4dfd7a665fa3faf3b05b673f Mon Sep 17 00:00:00 2001 +From: Dan Carpenter <dan.carpenter@oracle.com> +Date: Tue, 15 Dec 2015 13:07:52 +0300 +Subject: mISDN: fix a loop count + +commit 40d24c4d8a7430aa4dfd7a665fa3faf3b05b673f upstream. + +There are two issue here. +1) cnt starts as maxloop + 1 so all these loops iterate one more time + than intended. +2) At the end of the loop we test for "if (maxloop && !cnt)" but for + the first two loops, we end with cnt equal to -1. Changing this to + a pre-op means we end with cnt set to 0. + +Fixes: cae86d4a4e56 ('mISDN: Add driver for Infineon ISDN chipset family') +Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/isdn/hardware/mISDN/mISDNipac.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/isdn/hardware/mISDN/mISDNipac.c ++++ b/drivers/isdn/hardware/mISDN/mISDNipac.c +@@ -1156,7 +1156,7 @@ mISDNipac_irq(struct ipac_hw *ipac, int + + if (ipac->type & IPAC_TYPE_IPACX) { + ista = ReadIPAC(ipac, ISACX_ISTA); +- while (ista && cnt--) { ++ while (ista && --cnt) { + pr_debug("%s: ISTA %02x\n", ipac->name, ista); + if (ista & IPACX__ICA) + ipac_irq(&ipac->hscx[0], ista); +@@ -1168,7 +1168,7 @@ mISDNipac_irq(struct ipac_hw *ipac, int + } + } else if (ipac->type & IPAC_TYPE_IPAC) { + ista = ReadIPAC(ipac, IPAC_ISTA); +- while (ista && cnt--) { ++ while (ista && --cnt) { + pr_debug("%s: ISTA %02x\n", ipac->name, ista); + if (ista & (IPAC__ICD | IPAC__EXD)) { + istad = ReadISAC(isac, ISAC_ISTA); +@@ -1186,7 +1186,7 @@ mISDNipac_irq(struct ipac_hw *ipac, int + ista = ReadIPAC(ipac, IPAC_ISTA); + } + } else if (ipac->type & IPAC_TYPE_HSCX) { +- while (cnt) { ++ while (--cnt) { + ista = ReadIPAC(ipac, IPAC_ISTAB + ipac->hscx[1].off); + pr_debug("%s: B2 ISTA %02x\n", ipac->name, ista); + if (ista) +@@ -1197,7 +1197,6 @@ mISDNipac_irq(struct ipac_hw *ipac, int + mISDNisac_irq(isac, istad); + if (0 == (ista | istad)) + break; +- cnt--; + } + } + if (cnt > maxloop) /* only for ISAC/HSCX without PCI IRQ test */ diff --git a/patches/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch b/patches/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch new file mode 100644 index 0000000..7d68207 --- /dev/null +++ b/patches/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch @@ -0,0 +1,59 @@ +From 0d777df5d8953293be090d9ab5a355db893e8357 Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> +Date: Fri, 11 Dec 2015 13:40:49 -0800 +Subject: mm: hugetlb: call huge_pte_alloc() only if ptep is null + +commit 0d777df5d8953293be090d9ab5a355db893e8357 upstream. + +Currently at the beginning of hugetlb_fault(), we call huge_pte_offset() +and check whether the obtained *ptep is a migration/hwpoison entry or +not. And if not, then we get to call huge_pte_alloc(). This is racy +because the *ptep could turn into migration/hwpoison entry after the +huge_pte_offset() check. This race results in BUG_ON in +huge_pte_alloc(). + +We don't have to call huge_pte_alloc() when the huge_pte_offset() +returns non-NULL, so let's fix this bug with moving the code into else +block. + +Note that the *ptep could turn into a migration/hwpoison entry after +this block, but that's not a problem because we have another +!pte_present check later (we never go into hugetlb_no_page() in that +case.) + +Fixes: 290408d4a250 ("hugetlb: hugepage migration core") +Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> +Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> +Acked-by: David Rientjes <rientjes@google.com> +Cc: Hugh Dickins <hughd@google.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> +Cc: Mike Kravetz <mike.kravetz@oracle.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + mm/hugetlb.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -2835,12 +2835,12 @@ int hugetlb_fault(struct mm_struct *mm, + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) + return VM_FAULT_HWPOISON_LARGE | + VM_FAULT_SET_HINDEX(h - hstates); ++ } else { ++ ptep = huge_pte_alloc(mm, address, huge_page_size(h)); ++ if (!ptep) ++ return VM_FAULT_OOM; + } + +- ptep = huge_pte_alloc(mm, address, huge_page_size(h)); +- if (!ptep) +- return VM_FAULT_OOM; +- + /* + * Serialize hugepage allocation and instantiation, so that we don't + * get spurious allocation failures if two CPUs race to instantiate diff --git a/patches/mm-memory_hotplug.c-check-for-missing-sections-in-test_pages_in_a_zone.patch b/patches/mm-memory_hotplug.c-check-for-missing-sections-in-test_pages_in_a_zone.patch new file mode 100644 index 0000000..f606307 --- /dev/null +++ b/patches/mm-memory_hotplug.c-check-for-missing-sections-in-test_pages_in_a_zone.patch @@ -0,0 +1,81 @@ +From 5f0f2887f4de9508dcf438deab28f1de8070c271 Mon Sep 17 00:00:00 2001 +From: Andrew Banman <abanman@sgi.com> +Date: Tue, 29 Dec 2015 14:54:25 -0800 +Subject: mm/memory_hotplug.c: check for missing sections in + test_pages_in_a_zone() + +commit 5f0f2887f4de9508dcf438deab28f1de8070c271 upstream. + +test_pages_in_a_zone() does not account for the possibility of missing +sections in the given pfn range. pfn_valid_within always returns 1 when +CONFIG_HOLES_IN_ZONE is not set, allowing invalid pfns from missing +sections to pass the test, leading to a kernel oops. + +Wrap an additional pfn loop with PAGES_PER_SECTION granularity to check +for missing sections before proceeding into the zone-check code. + +This also prevents a crash from offlining memory devices with missing +sections. Despite this, it may be a good idea to keep the related patch +'[PATCH 3/3] drivers: memory: prohibit offlining of memory blocks with +missing sections' because missing sections in a memory block may lead to +other problems not covered by the scope of this fix. + +Signed-off-by: Andrew Banman <abanman@sgi.com> +Acked-by: Alex Thorlton <athorlton@sgi.com> +Cc: Russ Anderson <rja@sgi.com> +Cc: Alex Thorlton <athorlton@sgi.com> +Cc: Yinghai Lu <yinghai@kernel.org> +Cc: Greg KH <greg@kroah.com> +Cc: Seth Jennings <sjennings@variantweb.net> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + mm/memory_hotplug.c | 31 +++++++++++++++++++------------ + 1 file changed, 19 insertions(+), 12 deletions(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -716,23 +716,30 @@ int is_mem_section_removable(unsigned lo + */ + static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) + { +- unsigned long pfn; ++ unsigned long pfn, sec_end_pfn; + struct zone *zone = NULL; + struct page *page; + int i; +- for (pfn = start_pfn; ++ for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn); + pfn < end_pfn; +- pfn += MAX_ORDER_NR_PAGES) { +- i = 0; +- /* This is just a CONFIG_HOLES_IN_ZONE check.*/ +- while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) +- i++; +- if (i == MAX_ORDER_NR_PAGES) ++ pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) { ++ /* Make sure the memory section is present first */ ++ if (!present_section_nr(pfn_to_section_nr(pfn))) + continue; +- page = pfn_to_page(pfn + i); +- if (zone && page_zone(page) != zone) +- return 0; +- zone = page_zone(page); ++ for (; pfn < sec_end_pfn && pfn < end_pfn; ++ pfn += MAX_ORDER_NR_PAGES) { ++ i = 0; ++ /* This is just a CONFIG_HOLES_IN_ZONE check.*/ ++ while ((i < MAX_ORDER_NR_PAGES) && ++ !pfn_valid_within(pfn + i)) ++ i++; ++ if (i == MAX_ORDER_NR_PAGES) ++ continue; ++ page = pfn_to_page(pfn + i); ++ if (zone && page_zone(page) != zone) ++ return 0; ++ zone = page_zone(page); ++ } + } + return 1; + } diff --git a/patches/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch b/patches/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch new file mode 100644 index 0000000..d6c4561 --- /dev/null +++ b/patches/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch @@ -0,0 +1,120 @@ +From 373ccbe5927034b55bdc80b0f8b54d6e13fe8d12 Mon Sep 17 00:00:00 2001 +From: Michal Hocko <mhocko@suse.com> +Date: Fri, 11 Dec 2015 13:40:32 -0800 +Subject: mm, vmstat: allow WQ concurrency to discover memory reclaim doesn't + make any progress + +commit 373ccbe5927034b55bdc80b0f8b54d6e13fe8d12 upstream. + +Tetsuo Handa has reported that the system might basically livelock in +OOM condition without triggering the OOM killer. + +The issue is caused by internal dependency of the direct reclaim on +vmstat counter updates (via zone_reclaimable) which are performed from +the workqueue context. If all the current workers get assigned to an +allocation request, though, they will be looping inside the allocator +trying to reclaim memory but zone_reclaimable can see stalled numbers so +it will consider a zone reclaimable even though it has been scanned way +too much. WQ concurrency logic will not consider this situation as a +congested workqueue because it relies that worker would have to sleep in +such a situation. This also means that it doesn't try to spawn new +workers or invoke the rescuer thread if the one is assigned to the +queue. + +In order to fix this issue we need to do two things. First we have to +let wq concurrency code know that we are in trouble so we have to do a +short sleep. In order to prevent from issues handled by 0e093d99763e +("writeback: do not sleep on the congestion queue if there are no +congested BDIs or if significant congestion is not being encountered in +the current zone") we limit the sleep only to worker threads which are +the ones of the interest anyway. + +The second thing to do is to create a dedicated workqueue for vmstat and +mark it WQ_MEM_RECLAIM to note it participates in the reclaim and to +have a spare worker thread for it. + +Signed-off-by: Michal Hocko <mhocko@suse.com> +Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> +Cc: Tejun Heo <tj@kernel.org> +Cc: Cristopher Lameter <clameter@sgi.com> +Cc: Joonsoo Kim <js1304@gmail.com> +Cc: Arkadiusz Miskiewicz <arekm@maven.pl> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + mm/backing-dev.c | 19 ++++++++++++++++--- + mm/vmstat.c | 6 ++++-- + 2 files changed, 20 insertions(+), 5 deletions(-) + +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -843,8 +843,9 @@ EXPORT_SYMBOL(congestion_wait); + * jiffies for either a BDI to exit congestion of the given @sync queue + * or a write to complete. + * +- * In the absence of zone congestion, cond_resched() is called to yield +- * the processor if necessary but otherwise does not sleep. ++ * In the absence of zone congestion, a short sleep or a cond_resched is ++ * performed to yield the processor and to allow other subsystems to make ++ * a forward progress. + * + * The return value is 0 if the sleep is for the full timeout. Otherwise, + * it is the number of jiffies that were still remaining when the function +@@ -864,7 +865,19 @@ long wait_iff_congested(struct zone *zon + */ + if (atomic_read(&nr_bdi_congested[sync]) == 0 || + !zone_is_reclaim_congested(zone)) { +- cond_resched(); ++ ++ /* ++ * Memory allocation/reclaim might be called from a WQ ++ * context and the current implementation of the WQ ++ * concurrency control doesn't recognize that a particular ++ * WQ is congested if the worker thread is looping without ++ * ever sleeping. Therefore we have to do a short sleep ++ * here rather than calling cond_resched(). ++ */ ++ if (current->flags & PF_WQ_WORKER) ++ schedule_timeout(1); ++ else ++ cond_resched(); + + /* In case we scheduled, work out time remaining */ + ret = timeout - (jiffies - start); +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1139,13 +1139,14 @@ static const struct file_operations proc + #endif /* CONFIG_PROC_FS */ + + #ifdef CONFIG_SMP ++static struct workqueue_struct *vmstat_wq; + static DEFINE_PER_CPU(struct delayed_work, vmstat_work); + int sysctl_stat_interval __read_mostly = HZ; + + static void vmstat_update(struct work_struct *w) + { + refresh_cpu_vm_stats(smp_processor_id()); +- schedule_delayed_work(&__get_cpu_var(vmstat_work), ++ queue_delayed_work(vmstat_wq, &__get_cpu_var(vmstat_work), + round_jiffies_relative(sysctl_stat_interval)); + } + +@@ -1154,7 +1155,7 @@ static void __cpuinit start_cpu_timer(in + struct delayed_work *work = &per_cpu(vmstat_work, cpu); + + INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update); +- schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); ++ queue_delayed_work_on(cpu, vmstat_wq, work, __round_jiffies_relative(HZ, cpu)); + } + + /* +@@ -1204,6 +1205,7 @@ static int __init setup_vmstat(void) + + register_cpu_notifier(&vmstat_notifier); + ++ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + for_each_online_cpu(cpu) + start_cpu_timer(cpu); + #endif diff --git a/patches/net-possible-use-after-free-in-dst_release.patch b/patches/net-possible-use-after-free-in-dst_release.patch new file mode 100644 index 0000000..6d96343 --- /dev/null +++ b/patches/net-possible-use-after-free-in-dst_release.patch @@ -0,0 +1,38 @@ +From 07a5d38453599052aff0877b16bb9c1585f08609 Mon Sep 17 00:00:00 2001 +From: Francesco Ruggeri <fruggeri@aristanetworks.com> +Date: Wed, 6 Jan 2016 00:18:48 -0800 +Subject: net: possible use after free in dst_release + +commit 07a5d38453599052aff0877b16bb9c1585f08609 upstream. + +dst_release should not access dst->flags after decrementing +__refcnt to 0. The dst_entry may be in dst_busy_list and +dst_gc_task may dst_destroy it before dst_release gets a chance +to access dst->flags. + +Fixes: d69bbf88c8d0 ("net: fix a race in dst_release()") +Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") +Signed-off-by: Francesco Ruggeri <fruggeri@arista.com> +Acked-by: Eric Dumazet <edumazet@google.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + net/core/dst.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -269,10 +269,11 @@ void dst_release(struct dst_entry *dst) + { + if (dst) { + int newrefcnt; ++ unsigned short nocache = dst->flags & DST_NOCACHE; + + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); +- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) { ++ if (!newrefcnt && unlikely(nocache)) { + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); diff --git a/patches/ocfs2-fix-bug-when-calculate-new-backup-super.patch b/patches/ocfs2-fix-bug-when-calculate-new-backup-super.patch new file mode 100644 index 0000000..19884d0 --- /dev/null +++ b/patches/ocfs2-fix-bug-when-calculate-new-backup-super.patch @@ -0,0 +1,96 @@ +From 5c9ee4cbf2a945271f25b89b137f2c03bbc3be33 Mon Sep 17 00:00:00 2001 +From: Joseph Qi <joseph.qi@huawei.com> +Date: Tue, 29 Dec 2015 14:54:06 -0800 +Subject: ocfs2: fix BUG when calculate new backup super + +commit 5c9ee4cbf2a945271f25b89b137f2c03bbc3be33 upstream. + +When resizing, it firstly extends the last gd. Once it should backup +super in the gd, it calculates new backup super and update the +corresponding value. + +But it currently doesn't consider the situation that the backup super is +already done. And in this case, it still sets the bit in gd bitmap and +then decrease from bg_free_bits_count, which leads to a corrupted gd and +trigger the BUG in ocfs2_block_group_set_bits: + + BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); + +So check whether the backup super is done and then do the updates. + +Signed-off-by: Joseph Qi <joseph.qi@huawei.com> +Reviewed-by: Jiufei Xue <xuejiufei@huawei.com> +Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> +Cc: Mark Fasheh <mfasheh@suse.de> +Cc: Joel Becker <jlbec@evilplan.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + fs/ocfs2/resize.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/fs/ocfs2/resize.c ++++ b/fs/ocfs2/resize.c +@@ -56,11 +56,12 @@ static u16 ocfs2_calc_new_backup_super(s + int new_clusters, + u32 first_new_cluster, + u16 cl_cpg, ++ u16 old_bg_clusters, + int set) + { + int i; + u16 backups = 0; +- u32 cluster; ++ u32 cluster, lgd_cluster; + u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); + + for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { +@@ -73,6 +74,12 @@ static u16 ocfs2_calc_new_backup_super(s + else if (gd_blkno > lgd_blkno) + break; + ++ /* check if already done backup super */ ++ lgd_cluster = ocfs2_blocks_to_clusters(inode->i_sb, lgd_blkno); ++ lgd_cluster += old_bg_clusters; ++ if (lgd_cluster >= cluster) ++ continue; ++ + if (set) + ocfs2_set_bit(cluster % cl_cpg, + (unsigned long *)gd->bg_bitmap); +@@ -101,6 +108,7 @@ static int ocfs2_update_last_group_and_i + u16 chain, num_bits, backups = 0; + u16 cl_bpc = le16_to_cpu(cl->cl_bpc); + u16 cl_cpg = le16_to_cpu(cl->cl_cpg); ++ u16 old_bg_clusters; + + trace_ocfs2_update_last_group_and_inode(new_clusters, + first_new_cluster); +@@ -114,6 +122,7 @@ static int ocfs2_update_last_group_and_i + + group = (struct ocfs2_group_desc *)group_bh->b_data; + ++ old_bg_clusters = le16_to_cpu(group->bg_bits) / cl_bpc; + /* update the group first. */ + num_bits = new_clusters * cl_bpc; + le16_add_cpu(&group->bg_bits, num_bits); +@@ -129,7 +138,7 @@ static int ocfs2_update_last_group_and_i + group, + new_clusters, + first_new_cluster, +- cl_cpg, 1); ++ cl_cpg, old_bg_clusters, 1); + le16_add_cpu(&group->bg_free_bits_count, -1 * backups); + } + +@@ -169,7 +178,7 @@ out_rollback: + group, + new_clusters, + first_new_cluster, +- cl_cpg, 0); ++ cl_cpg, old_bg_clusters, 0); + le16_add_cpu(&group->bg_free_bits_count, backups); + le16_add_cpu(&group->bg_bits, -1 * num_bits); + le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); diff --git a/patches/parisc-fix-syscall-restarts.patch b/patches/parisc-fix-syscall-restarts.patch new file mode 100644 index 0000000..a7b928e --- /dev/null +++ b/patches/parisc-fix-syscall-restarts.patch @@ -0,0 +1,131 @@ +From 71a71fb5374a23be36a91981b5614590b9e722c3 Mon Sep 17 00:00:00 2001 +From: Helge Deller <deller@gmx.de> +Date: Mon, 21 Dec 2015 10:03:30 +0100 +Subject: parisc: Fix syscall restarts + +commit 71a71fb5374a23be36a91981b5614590b9e722c3 upstream. + +On parisc syscalls which are interrupted by signals sometimes failed to +restart and instead returned -ENOSYS which in the worst case lead to +userspace crashes. +A similiar problem existed on MIPS and was fixed by commit e967ef02 +("MIPS: Fix restart of indirect syscalls"). + +On parisc the current syscall restart code assumes that all syscall +callers load the syscall number in the delay slot of the ble +instruction. That's how it is e.g. done in the unistd.h header file: + ble 0x100(%sr2, %r0) + ldi #syscall_nr, %r20 +Because of that assumption the current code never restored %r20 before +returning to userspace. + +This assumption is at least not true for code which uses the glibc +syscall() function, which instead uses this syntax: + ble 0x100(%sr2, %r0) + copy regX, %r20 +where regX depend on how the compiler optimizes the code and register +usage. + +This patch fixes this problem by adding code to analyze how the syscall +number is loaded in the delay branch and - if needed - copy the syscall +number to regX prior returning to userspace for the syscall restart. + +Signed-off-by: Helge Deller <deller@gmx.de> +Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +[lizf: Backported to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + arch/parisc/kernel/signal.c | 67 ++++++++++++++++++++++++++++++++++---------- + 1 file changed, 52 insertions(+), 15 deletions(-) + +--- a/arch/parisc/kernel/signal.c ++++ b/arch/parisc/kernel/signal.c +@@ -468,6 +468,55 @@ handle_signal(unsigned long sig, siginfo + return 1; + } + ++/* ++ * Check how the syscall number gets loaded into %r20 within ++ * the delay branch in userspace and adjust as needed. ++ */ ++ ++static void check_syscallno_in_delay_branch(struct pt_regs *regs) ++{ ++ u32 opcode, source_reg; ++ u32 __user *uaddr; ++ int err; ++ ++ /* Usually we don't have to restore %r20 (the system call number) ++ * because it gets loaded in the delay slot of the branch external ++ * instruction via the ldi instruction. ++ * In some cases a register-to-register copy instruction might have ++ * been used instead, in which case we need to copy the syscall ++ * number into the source register before returning to userspace. ++ */ ++ ++ /* A syscall is just a branch, so all we have to do is fiddle the ++ * return pointer so that the ble instruction gets executed again. ++ */ ++ regs->gr[31] -= 8; /* delayed branching */ ++ ++ /* Get assembler opcode of code in delay branch */ ++ uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4); ++ err = get_user(opcode, uaddr); ++ if (err) ++ return; ++ ++ /* Check if delay branch uses "ldi int,%r20" */ ++ if ((opcode & 0xffff0000) == 0x34140000) ++ return; /* everything ok, just return */ ++ ++ /* Check if delay branch uses "nop" */ ++ if (opcode == INSN_NOP) ++ return; ++ ++ /* Check if delay branch uses "copy %rX,%r20" */ ++ if ((opcode & 0xffe0ffff) == 0x08000254) { ++ source_reg = (opcode >> 16) & 31; ++ regs->gr[source_reg] = regs->gr[20]; ++ return; ++ } ++ ++ pr_warn("syscall restart: %s (pid %d): unexpected opcode 0x%08x\n", ++ current->comm, task_pid_nr(current), opcode); ++} ++ + static inline void + syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) + { +@@ -489,10 +538,7 @@ syscall_restart(struct pt_regs *regs, st + } + /* fallthrough */ + case -ERESTARTNOINTR: +- /* A syscall is just a branch, so all +- * we have to do is fiddle the return pointer. +- */ +- regs->gr[31] -= 8; /* delayed branching */ ++ check_syscallno_in_delay_branch(regs); + /* Preserve original r28. */ + regs->gr[28] = regs->orig_r28; + break; +@@ -543,18 +589,9 @@ insert_restart_trampoline(struct pt_regs + } + case -ERESTARTNOHAND: + case -ERESTARTSYS: +- case -ERESTARTNOINTR: { +- /* Hooray for delayed branching. We don't +- * have to restore %r20 (the system call +- * number) because it gets loaded in the delay +- * slot of the branch external instruction. +- */ +- regs->gr[31] -= 8; +- /* Preserve original r28. */ +- regs->gr[28] = regs->orig_r28; +- ++ case -ERESTARTNOINTR: ++ check_syscallno_in_delay_branch(regs); + return; +- } + default: + break; + } diff --git a/patches/parisc-iommu-fix-panic-due-to-trying-to-allocate-too-large-region.patch b/patches/parisc-iommu-fix-panic-due-to-trying-to-allocate-too-large-region.patch new file mode 100644 index 0000000..03ab8b2 --- /dev/null +++ b/patches/parisc-iommu-fix-panic-due-to-trying-to-allocate-too-large-region.patch @@ -0,0 +1,124 @@ +From e46e31a3696ae2d66f32c207df3969613726e636 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka <mpatocka@redhat.com> +Date: Mon, 30 Nov 2015 14:47:46 -0500 +Subject: parisc iommu: fix panic due to trying to allocate too large region + +commit e46e31a3696ae2d66f32c207df3969613726e636 upstream. + +When using the Promise TX2+ SATA controller on PA-RISC, the system often +crashes with kernel panic, for example just writing data with the dd +utility will make it crash. + +Kernel panic - not syncing: drivers/parisc/sba_iommu.c: I/O MMU @ 000000000000a000 is out of mapping resources + +CPU: 0 PID: 18442 Comm: mkspadfs Not tainted 4.4.0-rc2 #2 +Backtrace: + [<000000004021497c>] show_stack+0x14/0x20 + [<0000000040410bf0>] dump_stack+0x88/0x100 + [<000000004023978c>] panic+0x124/0x360 + [<0000000040452c18>] sba_alloc_range+0x698/0x6a0 + [<0000000040453150>] sba_map_sg+0x260/0x5b8 + [<000000000c18dbb4>] ata_qc_issue+0x264/0x4a8 [libata] + [<000000000c19535c>] ata_scsi_translate+0xe4/0x220 [libata] + [<000000000c19a93c>] ata_scsi_queuecmd+0xbc/0x320 [libata] + [<0000000040499bbc>] scsi_dispatch_cmd+0xfc/0x130 + [<000000004049da34>] scsi_request_fn+0x6e4/0x970 + [<00000000403e95a8>] __blk_run_queue+0x40/0x60 + [<00000000403e9d8c>] blk_run_queue+0x3c/0x68 + [<000000004049a534>] scsi_run_queue+0x2a4/0x360 + [<000000004049be68>] scsi_end_request+0x1a8/0x238 + [<000000004049de84>] scsi_io_completion+0xfc/0x688 + [<0000000040493c74>] scsi_finish_command+0x17c/0x1d0 + +The cause of the crash is not exhaustion of the IOMMU space, there is +plenty of free pages. The function sba_alloc_range is called with size +0x11000, thus the pages_needed variable is 0x11. The function +sba_search_bitmap is called with bits_wanted 0x11 and boundary size is +0x10 (because dma_get_seg_boundary(dev) returns 0xffff). + +The function sba_search_bitmap attempts to allocate 17 pages that must not +cross 16-page boundary - it can't satisfy this requirement +(iommu_is_span_boundary always returns true) and fails even if there are +many free entries in the IOMMU space. + +How did it happen that we try to allocate 17 pages that don't cross +16-page boundary? The cause is in the function iommu_coalesce_chunks. This +function tries to coalesce adjacent entries in the scatterlist. The +function does several checks if it may coalesce one entry with the next, +one of those checks is this: + + if (startsg->length + dma_len > max_seg_size) + break; + +When it finishes coalescing adjacent entries, it allocates the mapping: + +sg_dma_len(contig_sg) = dma_len; +dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); +sg_dma_address(contig_sg) = + PIDE_FLAG + | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) + | dma_offset; + +It is possible that (startsg->length + dma_len > max_seg_size) is false +(we are just near the 0x10000 max_seg_size boundary), so the funcion +decides to coalesce this entry with the next entry. When the coalescing +succeeds, the function performs + dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); +And now, because of non-zero dma_offset, dma_len is greater than 0x10000. +iommu_alloc_range (a pointer to sba_alloc_range) is called and it attempts +to allocate 17 pages for a device that must not cross 16-page boundary. + +To fix the bug, we must make sure that dma_len after addition of +dma_offset and alignment doesn't cross the segment boundary. I.e. change + if (startsg->length + dma_len > max_seg_size) + break; +to + if (ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > max_seg_size) + break; + +This patch makes this change (it precalculates max_seg_boundary at the +beginning of the function iommu_coalesce_chunks). I also added a check +that the mapping length doesn't exceed dma_get_seg_boundary(dev) (it is +not needed for Promise TX2+ SATA, but it may be needed for other devices +that have dma_get_seg_boundary lower than dma_get_max_seg_size). + +Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> +Signed-off-by: Helge Deller <deller@gmx.de> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/parisc/iommu-helpers.h | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/drivers/parisc/iommu-helpers.h ++++ b/drivers/parisc/iommu-helpers.h +@@ -104,7 +104,11 @@ iommu_coalesce_chunks(struct ioc *ioc, s + struct scatterlist *contig_sg; /* contig chunk head */ + unsigned long dma_offset, dma_len; /* start/len of DMA stream */ + unsigned int n_mappings = 0; +- unsigned int max_seg_size = dma_get_max_seg_size(dev); ++ unsigned int max_seg_size = min(dma_get_max_seg_size(dev), ++ (unsigned)DMA_CHUNK_SIZE); ++ unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1; ++ if (max_seg_boundary) /* check if the addition above didn't overflow */ ++ max_seg_size = min(max_seg_size, max_seg_boundary); + + while (nents > 0) { + +@@ -139,14 +143,11 @@ iommu_coalesce_chunks(struct ioc *ioc, s + + /* + ** First make sure current dma stream won't +- ** exceed DMA_CHUNK_SIZE if we coalesce the ++ ** exceed max_seg_size if we coalesce the + ** next entry. + */ +- if(unlikely(ALIGN(dma_len + dma_offset + startsg->length, +- IOVP_SIZE) > DMA_CHUNK_SIZE)) +- break; +- +- if (startsg->length + dma_len > max_seg_size) ++ if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > ++ max_seg_size)) + break; + + /* diff --git a/patches/rfkill-copy-the-name-into-the-rfkill-struct.patch b/patches/rfkill-copy-the-name-into-the-rfkill-struct.patch new file mode 100644 index 0000000..502b51b --- /dev/null +++ b/patches/rfkill-copy-the-name-into-the-rfkill-struct.patch @@ -0,0 +1,56 @@ +From b7bb110008607a915298bf0f47d25886ecb94477 Mon Sep 17 00:00:00 2001 +From: Johannes Berg <johannes.berg@intel.com> +Date: Thu, 10 Dec 2015 10:37:51 +0100 +Subject: rfkill: copy the name into the rfkill struct + +commit b7bb110008607a915298bf0f47d25886ecb94477 upstream. + +Some users of rfkill, like NFC and cfg80211, use a dynamic name when +allocating rfkill, in those cases dev_name(). Therefore, the pointer +passed to rfkill_alloc() might not be valid forever, I specifically +found the case that the rfkill name was quite obviously an invalid +pointer (or at least garbage) when the wiphy had been renamed. + +Fix this by making a copy of the rfkill name in rfkill_alloc(). + +Signed-off-by: Johannes Berg <johannes.berg@intel.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + net/rfkill/core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/rfkill/core.c ++++ b/net/rfkill/core.c +@@ -51,7 +51,6 @@ + struct rfkill { + spinlock_t lock; + +- const char *name; + enum rfkill_type type; + + unsigned long state; +@@ -75,6 +74,7 @@ struct rfkill { + struct delayed_work poll_work; + struct work_struct uevent_work; + struct work_struct sync_work; ++ char name[]; + }; + #define to_rfkill(d) container_of(d, struct rfkill, dev) + +@@ -849,14 +849,14 @@ struct rfkill * __must_check rfkill_allo + if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES)) + return NULL; + +- rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL); ++ rfkill = kzalloc(sizeof(*rfkill) + strlen(name) + 1, GFP_KERNEL); + if (!rfkill) + return NULL; + + spin_lock_init(&rfkill->lock); + INIT_LIST_HEAD(&rfkill->node); + rfkill->type = type; +- rfkill->name = name; ++ strcpy(rfkill->name, name); + rfkill->ops = ops; + rfkill->data = ops_data; + diff --git a/patches/sata_sil-disable-trim.patch b/patches/sata_sil-disable-trim.patch new file mode 100644 index 0000000..86c935c --- /dev/null +++ b/patches/sata_sil-disable-trim.patch @@ -0,0 +1,48 @@ +From d98f1cd0a3b70ea91f1dfda3ac36c3b2e1a4d5e2 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka <mpatocka@redhat.com> +Date: Thu, 26 Nov 2015 12:00:59 -0500 +Subject: sata_sil: disable trim + +commit d98f1cd0a3b70ea91f1dfda3ac36c3b2e1a4d5e2 upstream. + +When I connect an Intel SSD to SATA SIL controller (PCI ID 1095:3114), any +TRIM command results in I/O errors being reported in the log. There is +other similar error reported with TRIM and the SIL controller: +https://bugs.centos.org/view.php?id=5880 + +Apparently the controller doesn't support TRIM commands. This patch +disables TRIM support on the SATA SIL controller. + +ata7.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0 +ata7.00: BMDMA2 stat 0x50001 +ata7.00: failed command: DATA SET MANAGEMENT +ata7.00: cmd 06/01:01:00:00:00/00:00:00:00:00/a0 tag 0 dma 512 out + res 51/04:01:00:00:00/00:00:00:00:00/a0 Emask 0x1 (device error) +ata7.00: status: { DRDY ERR } +ata7.00: error: { ABRT } +ata7.00: device reported invalid CHS sector 0 +sd 8:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE +sd 8:0:0:0: [sdb] tag#0 Sense Key : Illegal Request [current] [descriptor] +sd 8:0:0:0: [sdb] tag#0 Add. Sense: Unaligned write command +sd 8:0:0:0: [sdb] tag#0 CDB: Write same(16) 93 08 00 00 00 00 00 21 95 88 00 20 00 00 00 00 +blk_update_request: I/O error, dev sdb, sector 2200968 + +Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> +Signed-off-by: Tejun Heo <tj@kernel.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/ata/sata_sil.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/ata/sata_sil.c ++++ b/drivers/ata/sata_sil.c +@@ -631,6 +631,9 @@ static void sil_dev_config(struct ata_de + unsigned int n, quirks = 0; + unsigned char model_num[ATA_ID_PROD_LEN + 1]; + ++ /* This controller doesn't support trim */ ++ dev->horkage |= ATA_HORKAGE_NOTRIM; ++ + ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num)); + + for (n = 0; sil_blacklist[n].product; n++) diff --git a/patches/scripts-recordmcount-break-hardlinks.patch b/patches/scripts-recordmcount-break-hardlinks.patch new file mode 100644 index 0000000..4a22bcd --- /dev/null +++ b/patches/scripts-recordmcount-break-hardlinks.patch @@ -0,0 +1,43 @@ +From dd39a26538e37f6c6131e829a4a510787e43c783 Mon Sep 17 00:00:00 2001 +From: Russell King <rmk+kernel@arm.linux.org.uk> +Date: Fri, 11 Dec 2015 12:09:03 +0000 +Subject: scripts: recordmcount: break hardlinks + +commit dd39a26538e37f6c6131e829a4a510787e43c783 upstream. + +recordmcount edits the file in-place, which can cause problems when +using ccache in hardlink mode. Arrange for recordmcount to break a +hardlinked object. + +Link: http://lkml.kernel.org/r/E1a7MVT-0000et-62@rmk-PC.arm.linux.org.uk + +Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> +Signed-off-by: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + scripts/recordmcount.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/scripts/recordmcount.c ++++ b/scripts/recordmcount.c +@@ -182,6 +182,20 @@ static void *mmap_file(char const *fname + addr = umalloc(sb.st_size); + uread(fd_map, addr, sb.st_size); + } ++ if (sb.st_nlink != 1) { ++ /* file is hard-linked, break the hard link */ ++ close(fd_map); ++ if (unlink(fname) < 0) { ++ perror(fname); ++ fail_file(); ++ } ++ fd_map = open(fname, O_RDWR | O_CREAT, sb.st_mode); ++ if (fd_map < 0) { ++ perror(fname); ++ fail_file(); ++ } ++ uwrite(fd_map, addr, sb.st_size); ++ } + return addr; + } + diff --git a/patches/sctp-start-t5-timer-only-when-peer-rwnd-is-0-and-local-state-is-shutdown_pending.patch b/patches/sctp-start-t5-timer-only-when-peer-rwnd-is-0-and-local-state-is-shutdown_pending.patch new file mode 100644 index 0000000..a6ee7a0 --- /dev/null +++ b/patches/sctp-start-t5-timer-only-when-peer-rwnd-is-0-and-local-state-is-shutdown_pending.patch @@ -0,0 +1,69 @@ +From: lucien <lucien.xin@gmail.com> +Date: Sat, 5 Dec 2015 15:35:36 +0800 +Subject: sctp: start t5 timer only when peer rwnd is 0 and local state is + SHUTDOWN_PENDING + +commit 8a0d19c5ed417c78d03f4e0fa7215e58c40896d8 upstream. + +when A sends a data to B, then A close() and enter into SHUTDOWN_PENDING +state, if B neither claim his rwnd is 0 nor send SACK for this data, A +will keep retransmitting this data until t5 timeout, Max.Retrans times +can't work anymore, which is bad. + +if B's rwnd is not 0, it should send abort after Max.Retrans times, only +when B's rwnd == 0 and A's retransmitting beyonds Max.Retrans times, A +will start t5 timer, which is also commit f8d960524328 ("sctp: Enforce +retransmission limit during shutdown") means, but it lacks the condition +peer rwnd == 0. + +so fix it by adding a bit (zero_window_announced) in peer to record if +the last rwnd is 0. If it was, zero_window_announced will be set. and use +this bit to decide if start t5 timer when local.state is SHUTDOWN_PENDING. + +Fixes: commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown") +Signed-off-by: Xin Long <lucien.xin@gmail.com> +Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +[bwh: Backported to 3.2: change sack_needed to bitfield as done earlier upstream] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + include/net/sctp/structs.h | 3 ++- + net/sctp/outqueue.c | 1 + + net/sctp/sm_statefuns.c | 3 ++- + 3 files changed, 5 insertions(+), 2 deletions(-) + +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -1587,7 +1587,8 @@ struct sctp_association { + * : order. When DATA chunks are out of order, + * : SACK's are not delayed (see Section 6). + */ +- __u8 sack_needed; /* Do we need to sack the peer? */ ++ __u8 sack_needed:1, /* Do we need to sack the peer? */ ++ zero_window_announced:1; + __u32 sack_cnt; + + /* These are capabilities which our peer advertised. */ +--- a/net/sctp/outqueue.c ++++ b/net/sctp/outqueue.c +@@ -1265,6 +1265,7 @@ int sctp_outq_sack(struct sctp_outq *q, + */ + + sack_a_rwnd = ntohl(sack->a_rwnd); ++ asoc->peer.zero_window_announced = !sack_a_rwnd; + outstanding = q->outstanding_bytes; + + if (outstanding < sack_a_rwnd) +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -5299,7 +5299,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx( + SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + + if (asoc->overall_error_count >= asoc->max_retrans) { +- if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { ++ if (asoc->peer.zero_window_announced && ++ asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { + /* + * We are here likely because the receiver had its rwnd + * closed for a while and we have not been able to diff --git a/patches/ser_gigaset-fix-deallocation-of-platform-device-structure.patch b/patches/ser_gigaset-fix-deallocation-of-platform-device-structure.patch new file mode 100644 index 0000000..76fea8d --- /dev/null +++ b/patches/ser_gigaset-fix-deallocation-of-platform-device-structure.patch @@ -0,0 +1,51 @@ +From 4c5e354a974214dfb44cd23fa0429327693bc3ea Mon Sep 17 00:00:00 2001 +From: Tilman Schmidt <tilman@imap.cc> +Date: Tue, 15 Dec 2015 18:11:30 +0100 +Subject: ser_gigaset: fix deallocation of platform device structure + +commit 4c5e354a974214dfb44cd23fa0429327693bc3ea upstream. + +When shutting down the device, the struct ser_cardstate must not be +kfree()d immediately after the call to platform_device_unregister() +since the embedded struct platform_device is still in use. +Move the kfree() call to the release method instead. + +Signed-off-by: Tilman Schmidt <tilman@imap.cc> +Fixes: 2869b23e4b95 ("drivers/isdn/gigaset: new M101 driver (v2)") +Reported-by: Sasha Levin <sasha.levin@oracle.com> +Signed-off-by: Paul Bolle <pebolle@tiscali.nl> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/isdn/gigaset/ser-gigaset.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/isdn/gigaset/ser-gigaset.c ++++ b/drivers/isdn/gigaset/ser-gigaset.c +@@ -371,19 +371,23 @@ static void gigaset_freecshw(struct card + tasklet_kill(&cs->write_tasklet); + if (!cs->hw.ser) + return; +- dev_set_drvdata(&cs->hw.ser->dev.dev, NULL); + platform_device_unregister(&cs->hw.ser->dev); +- kfree(cs->hw.ser); +- cs->hw.ser = NULL; + } + + static void gigaset_device_release(struct device *dev) + { + struct platform_device *pdev = to_platform_device(dev); ++ struct cardstate *cs = dev_get_drvdata(dev); + + /* adapted from platform_device_release() in drivers/base/platform.c */ + kfree(dev->platform_data); + kfree(pdev->resource); ++ ++ if (!cs) ++ return; ++ dev_set_drvdata(dev, NULL); ++ kfree(cs->hw.ser); ++ cs->hw.ser = NULL; + } + + /* diff --git a/patches/series b/patches/series index d0f97a2..ce5dfe8 100644 --- a/patches/series +++ b/patches/series @@ -58,3 +58,53 @@ wan-x25-fix-use-after-free-in-x25_asy_open_tty.patch sched-core-clear-the-root_domain-cpumasks-in-init_rootdomain.patch x86-signal-fix-restart_syscall-number-for-x32-tasks.patch fix-sysvfs-symlinks.patch +fuse-break-infinite-loop-in-fuse_fill_write_pages.patch +usb-cp210x-remove-cp2110-id-from-compatibility-list.patch +ext4-fix-handling-of-extended-tv_sec.patch +jbd2-fix-unreclaimed-pages-after-truncate-in-data-journal-mode.patch +drm-ttm-fixed-a-read-write-lock-imbalance.patch +ahci-fix-softreset-failed-issue-of-port-multiplier.patch +sata_sil-disable-trim.patch +usb-whci-hcd-add-check-for-dma-mapping-error.patch +dm-btree-fix-leak-of-bufio-backed-block-in-btree_split_sibling-error-path.patch +usb-xhci-fix-config-fail-of-fs-hub-behind-a-hs-hub-with-mtt.patch +alsa-rme96-fix-unexpected-volume-reset-after-rate-changes.patch +sctp-start-t5-timer-only-when-peer-rwnd-is-0-and-local-state-is-shutdown_pending.patch +9p-evict_inode-should-kick-out-i_data-not-i_mapping.patch +crypto-skcipher-copy-iv-from-desc-even-for-0-len-walks.patch +rfkill-copy-the-name-into-the-rfkill-struct.patch +dm-btree-fix-bufio-buffer-leaks-in-dm_btree_del-error-path.patch +ses-fix-problems-with-simple-enclosures.patch +vgaarb-fix-signal-handling-in-vga_get.patch +ses-fix-additional-element-traversal-bug.patch +parisc-iommu-fix-panic-due-to-trying-to-allocate-too-large-region.patch +mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch +mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch +tty-fix-gpf-in-flush_to_ldisc.patch +genirq-prevent-chip-buslock-deadlock.patch +sh_eth-fix-tx-buffer-byte-swapping.patch +arm-8471-1-need-to-save-restore-arm-register-r11-when-it-is-corrupted.patch +misdn-fix-a-loop-count.patch +ser_gigaset-fix-deallocation-of-platform-device-structure.patch +spi-fix-parent-device-reference-leak.patch +scripts-recordmcount-break-hardlinks.patch +ftrace-scripts-have-recordmcount-copy-the-object-file.patch +xen-add-ring_copy_request.patch +xen-netback-don-t-use-last-request-to-determine-minimum-tx-credit.patch +xen-netback-use-ring_copy_request-throughout.patch +xen-blkback-only-read-request-operation-from-shared-ring-once.patch +xen-pciback-save-xen_pci_op-commands-before-processing-it.patch +xen-pciback-return-error-on-xen_pci_op_enable_msi-when-device-has-msi-or-msi-x-enabled.patch +xen-pciback-return-error-on-xen_pci_op_enable_msix-when-device-has-msi-or-msi-x-enabled.patch +xen-pciback-do-not-install-an-irq-handler-for-msi-interrupts.patch +xen-pciback-for-xen_pci_op_disable_msi-x-only-disable-if-device-has-msi-x-enabled.patch +xen-pciback-don-t-allow-msi-x-ops-if-pci_command_memory-is-not-set.patch +usb-ipaq.c-fix-a-timeout-loop.patch +usb-fix-invalid-memory-access-in-hub_activate.patch +keys-fix-race-between-read-and-revoke.patch +parisc-fix-syscall-restarts.patch +ipv6-addrlabel-fix-ip6addrlbl_get.patch +ocfs2-fix-bug-when-calculate-new-backup-super.patch +mm-memory_hotplug.c-check-for-missing-sections-in-test_pages_in_a_zone.patch +ftrace-scripts-fix-incorrect-use-of-sprintf-in-recordmcount.patch +net-possible-use-after-free-in-dst_release.patch diff --git a/patches/ses-fix-additional-element-traversal-bug.patch b/patches/ses-fix-additional-element-traversal-bug.patch new file mode 100644 index 0000000..c63046b --- /dev/null +++ b/patches/ses-fix-additional-element-traversal-bug.patch @@ -0,0 +1,58 @@ +From 5e1033561da1152c57b97ee84371dba2b3d64c25 Mon Sep 17 00:00:00 2001 +From: James Bottomley <James.Bottomley@HansenPartnership.com> +Date: Fri, 11 Dec 2015 09:16:38 -0800 +Subject: ses: fix additional element traversal bug + +commit 5e1033561da1152c57b97ee84371dba2b3d64c25 upstream. + +KASAN found that our additional element processing scripts drop off +the end of the VPD page into unallocated space. The reason is that +not every element has additional information but our traversal +routines think they do, leading to them expecting far more additional +information than is present. Fix this by adding a gate to the +traversal routine so that it only processes elements that are expected +to have additional information (list is in SES-2 section 6.1.13.1: +Additional Element Status diagnostic page overview) + +Reported-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com> +Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com> +Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/scsi/ses.c | 10 +++++++++- + include/linux/enclosure.h | 4 ++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -454,7 +454,15 @@ static void ses_enclosure_data_process(s + if (desc_ptr) + desc_ptr += len; + +- if (addl_desc_ptr) ++ if (addl_desc_ptr && ++ /* only find additional descriptions for specific devices */ ++ (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE || ++ type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE || ++ type_ptr[0] == ENCLOSURE_COMPONENT_SAS_EXPANDER || ++ /* these elements are optional */ ++ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT || ++ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT || ++ type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) + addl_desc_ptr += addl_desc_ptr[1] + 2; + + } +--- a/include/linux/enclosure.h ++++ b/include/linux/enclosure.h +@@ -29,7 +29,11 @@ + /* A few generic types ... taken from ses-2 */ + enum enclosure_component_type { + ENCLOSURE_COMPONENT_DEVICE = 0x01, ++ ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS = 0x07, ++ ENCLOSURE_COMPONENT_SCSI_TARGET_PORT = 0x14, ++ ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT = 0x15, + ENCLOSURE_COMPONENT_ARRAY_DEVICE = 0x17, ++ ENCLOSURE_COMPONENT_SAS_EXPANDER = 0x18, + }; + + /* ses-2 common element status */ diff --git a/patches/ses-fix-problems-with-simple-enclosures.patch b/patches/ses-fix-problems-with-simple-enclosures.patch new file mode 100644 index 0000000..75930bc --- /dev/null +++ b/patches/ses-fix-problems-with-simple-enclosures.patch @@ -0,0 +1,64 @@ +From 3417c1b5cb1fdc10261dbed42b05cc93166a78fd Mon Sep 17 00:00:00 2001 +From: James Bottomley <James.Bottomley@HansenPartnership.com> +Date: Tue, 8 Dec 2015 09:00:31 -0800 +Subject: ses: Fix problems with simple enclosures + +commit 3417c1b5cb1fdc10261dbed42b05cc93166a78fd upstream. + +Simple enclosure implementations (mostly USB) are allowed to return only +page 8 to every diagnostic query. That really confuses our +implementation because we assume the return is the page we asked for and +end up doing incorrect offsets based on bogus information leading to +accesses outside of allocated ranges. Fix that by checking the page +code of the return and giving an error if it isn't the one we asked for. +This should fix reported bugs with USB storage by simply refusing to +attach to enclosures that behave like this. It's also good defensive +practise now that we're starting to see more USB enclosures. + +Reported-by: Andrea Gelmini <andrea.gelmini@gelma.net> +Reviewed-by: Ewan D. Milne <emilne@redhat.com> +Reviewed-by: Tomas Henzl <thenzl@redhat.com> +Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/scsi/ses.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/ses.c ++++ b/drivers/scsi/ses.c +@@ -70,6 +70,7 @@ static int ses_probe(struct device *dev) + static int ses_recv_diag(struct scsi_device *sdev, int page_code, + void *buf, int bufflen) + { ++ int ret; + unsigned char cmd[] = { + RECEIVE_DIAGNOSTIC, + 1, /* Set PCV bit */ +@@ -78,9 +79,26 @@ static int ses_recv_diag(struct scsi_dev + bufflen & 0xff, + 0 + }; ++ unsigned char recv_page_code; + +- return scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, ++ ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, + NULL, SES_TIMEOUT, SES_RETRIES, NULL); ++ if (unlikely(!ret)) ++ return ret; ++ ++ recv_page_code = ((unsigned char *)buf)[0]; ++ ++ if (likely(recv_page_code == page_code)) ++ return ret; ++ ++ /* successful diagnostic but wrong page code. This happens to some ++ * USB devices, just print a message and pretend there was an error */ ++ ++ sdev_printk(KERN_ERR, sdev, ++ "Wrong diagnostic page; asked for %d got %u\n", ++ page_code, recv_page_code); ++ ++ return -EINVAL; + } + + static int ses_send_diag(struct scsi_device *sdev, int page_code, diff --git a/patches/sh_eth-fix-tx-buffer-byte-swapping.patch b/patches/sh_eth-fix-tx-buffer-byte-swapping.patch new file mode 100644 index 0000000..0a28e3a --- /dev/null +++ b/patches/sh_eth-fix-tx-buffer-byte-swapping.patch @@ -0,0 +1,31 @@ +From 3e2309937f1e5d538ff13da5fb8de41196927c61 Mon Sep 17 00:00:00 2001 +From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> +Date: Sun, 13 Dec 2015 21:27:04 +0300 +Subject: sh_eth: fix TX buffer byte-swapping + +commit 3e2309937f1e5d538ff13da5fb8de41196927c61 upstream. + +For the little-endian SH771x kernels the driver has to byte-swap the RX/TX +buffers, however yet unset physcial address from the TX descriptor is used +to call sh_eth_soft_swap(). Use 'skb->data' instead... + +Fixes: 31fcb99d9958 ("net: sh_eth: remove __flush_purge_region") +Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/net/ethernet/renesas/sh_eth.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/renesas/sh_eth.c ++++ b/drivers/net/ethernet/renesas/sh_eth.c +@@ -1513,8 +1513,7 @@ static int sh_eth_start_xmit(struct sk_b + txdesc = &mdp->tx_ring[entry]; + /* soft swap. */ + if (!mdp->cd->hw_swap) +- sh_eth_soft_swap(phys_to_virt(ALIGN(txdesc->addr, 4)), +- skb->len + 2); ++ sh_eth_soft_swap(PTR_ALIGN(skb->data, 4), skb->len + 2); + txdesc->addr = dma_map_single(&ndev->dev, skb->data, skb->len, + DMA_TO_DEVICE); + if (skb->len < ETHERSMALL) diff --git a/patches/spi-fix-parent-device-reference-leak.patch b/patches/spi-fix-parent-device-reference-leak.patch new file mode 100644 index 0000000..d9d8e5b --- /dev/null +++ b/patches/spi-fix-parent-device-reference-leak.patch @@ -0,0 +1,33 @@ +From 157f38f993919b648187ba341bfb05d0e91ad2f6 Mon Sep 17 00:00:00 2001 +From: Johan Hovold <johan@kernel.org> +Date: Mon, 14 Dec 2015 16:16:19 +0100 +Subject: spi: fix parent-device reference leak + +commit 157f38f993919b648187ba341bfb05d0e91ad2f6 upstream. + +Fix parent-device reference leak due to SPI-core taking an unnecessary +reference to the parent when allocating the master structure, a +reference that was never released. + +Note that driver core takes its own reference to the parent when the +master device is registered. + +Fixes: 49dce689ad4e ("spi doesn't need class_device") +Signed-off-by: Johan Hovold <johan@kernel.org> +Signed-off-by: Mark Brown <broonie@kernel.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/spi/spi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/spi/spi.c ++++ b/drivers/spi/spi.c +@@ -846,7 +846,7 @@ struct spi_master *spi_alloc_master(stru + + device_initialize(&master->dev); + master->dev.class = &spi_master_class; +- master->dev.parent = get_device(dev); ++ master->dev.parent = dev; + spi_master_set_devdata(master, &master[1]); + + return master; diff --git a/patches/tty-fix-gpf-in-flush_to_ldisc.patch b/patches/tty-fix-gpf-in-flush_to_ldisc.patch new file mode 100644 index 0000000..f0939d1 --- /dev/null +++ b/patches/tty-fix-gpf-in-flush_to_ldisc.patch @@ -0,0 +1,67 @@ +From 9ce119f318ba1a07c29149301f1544b6c4bea52a Mon Sep 17 00:00:00 2001 +From: Peter Hurley <peter@hurleysoftware.com> +Date: Fri, 27 Nov 2015 14:25:08 -0500 +Subject: tty: Fix GPF in flush_to_ldisc() + +commit 9ce119f318ba1a07c29149301f1544b6c4bea52a upstream. + +A line discipline which does not define a receive_buf() method can +can cause a GPF if data is ever received [1]. Oddly, this was known +to the author of n_tracesink in 2011, but never fixed. + +[1] GPF report + BUG: unable to handle kernel NULL pointer dereference at (null) + IP: [< (null)>] (null) + PGD 3752d067 PUD 37a7b067 PMD 0 + Oops: 0010 [#1] SMP KASAN + Modules linked in: + CPU: 2 PID: 148 Comm: kworker/u10:2 Not tainted 4.4.0-rc2+ #51 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + Workqueue: events_unbound flush_to_ldisc + task: ffff88006da94440 ti: ffff88006db60000 task.ti: ffff88006db60000 + RIP: 0010:[<0000000000000000>] [< (null)>] (null) + RSP: 0018:ffff88006db67b50 EFLAGS: 00010246 + RAX: 0000000000000102 RBX: ffff88003ab32f88 RCX: 0000000000000102 + RDX: 0000000000000000 RSI: ffff88003ab330a6 RDI: ffff88003aabd388 + RBP: ffff88006db67c48 R08: ffff88003ab32f9c R09: ffff88003ab31fb0 + R10: ffff88003ab32fa8 R11: 0000000000000000 R12: dffffc0000000000 + R13: ffff88006db67c20 R14: ffffffff863df820 R15: ffff88003ab31fb8 + FS: 0000000000000000(0000) GS:ffff88006dc00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: 0000000000000000 CR3: 0000000037938000 CR4: 00000000000006e0 + Stack: + ffffffff829f46f1 ffff88006da94bf8 ffff88006da94bf8 0000000000000000 + ffff88003ab31fb0 ffff88003aabd438 ffff88003ab31ff8 ffff88006430fd90 + ffff88003ab32f9c ffffed0007557a87 1ffff1000db6cf78 ffff88003ab32078 + Call Trace: + [<ffffffff8127cf91>] process_one_work+0x8f1/0x17a0 kernel/workqueue.c:2030 + [<ffffffff8127df14>] worker_thread+0xd4/0x1180 kernel/workqueue.c:2162 + [<ffffffff8128faaf>] kthread+0x1cf/0x270 drivers/block/aoe/aoecmd.c:1302 + [<ffffffff852a7c2f>] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:468 + Code: Bad RIP value. + RIP [< (null)>] (null) + RSP <ffff88006db67b50> + CR2: 0000000000000000 + ---[ end trace a587f8947e54d6ea ]--- + +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: Peter Hurley <peter@hurleysoftware.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +[lizf: Backportd to 3.4: adjust context] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/tty/tty_buffer.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/tty/tty_buffer.c ++++ b/drivers/tty/tty_buffer.c +@@ -443,7 +443,8 @@ static void flush_to_ldisc(struct work_s + flag_buf = head->flag_buf_ptr + head->read; + head->read += count; + spin_unlock_irqrestore(&tty->buf.lock, flags); +- disc->ops->receive_buf(tty, char_buf, ++ if (disc->ops->receive_buf) ++ disc->ops->receive_buf(tty, char_buf, + flag_buf, count); + spin_lock_irqsave(&tty->buf.lock, flags); + } diff --git a/patches/usb-cp210x-remove-cp2110-id-from-compatibility-list.patch b/patches/usb-cp210x-remove-cp2110-id-from-compatibility-list.patch new file mode 100644 index 0000000..f591529 --- /dev/null +++ b/patches/usb-cp210x-remove-cp2110-id-from-compatibility-list.patch @@ -0,0 +1,27 @@ +From 7c90e610b60cd1ed6abafd806acfaedccbbe52d1 Mon Sep 17 00:00:00 2001 +From: Konstantin Shkolnyy <konstantin.shkolnyy@gmail.com> +Date: Tue, 10 Nov 2015 16:40:13 -0600 +Subject: USB: cp210x: Remove CP2110 ID from compatibility list + +commit 7c90e610b60cd1ed6abafd806acfaedccbbe52d1 upstream. + +CP2110 ID (0x10c4, 0xea80) doesn't belong here because it's a HID +and completely different from CP210x devices. + +Signed-off-by: Konstantin Shkolnyy <konstantin.shkolnyy@gmail.com> +Signed-off-by: Johan Hovold <johan@kernel.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/usb/serial/cp210x.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/usb/serial/cp210x.c ++++ b/drivers/usb/serial/cp210x.c +@@ -138,7 +138,6 @@ static const struct usb_device_id id_tab + { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ + { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ + { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ +- { USB_DEVICE(0x10C4, 0xEA80) }, /* Silicon Labs factory default */ + { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */ + { USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */ + { USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */ diff --git a/patches/usb-fix-invalid-memory-access-in-hub_activate.patch b/patches/usb-fix-invalid-memory-access-in-hub_activate.patch new file mode 100644 index 0000000..11ff280 --- /dev/null +++ b/patches/usb-fix-invalid-memory-access-in-hub_activate.patch @@ -0,0 +1,88 @@ +From e50293ef9775c5f1cf3fcc093037dd6a8c5684ea Mon Sep 17 00:00:00 2001 +From: Alan Stern <stern@rowland.harvard.edu> +Date: Wed, 16 Dec 2015 13:32:38 -0500 +Subject: USB: fix invalid memory access in hub_activate() + +commit e50293ef9775c5f1cf3fcc093037dd6a8c5684ea upstream. + +Commit 8520f38099cc ("USB: change hub initialization sleeps to +delayed_work") changed the hub_activate() routine to make part of it +run in a workqueue. However, the commit failed to take a reference to +the usb_hub structure or to lock the hub interface while doing so. As +a result, if a hub is plugged in and quickly unplugged before the work +routine can run, the routine will try to access memory that has been +deallocated. Or, if the hub is unplugged while the routine is +running, the memory may be deallocated while it is in active use. + +This patch fixes the problem by taking a reference to the usb_hub at +the start of hub_activate() and releasing it at the end (when the work +is finished), and by locking the hub interface while the work routine +is running. It also adds a check at the start of the routine to see +if the hub has already been disconnected, in which nothing should be +done. + +Signed-off-by: Alan Stern <stern@rowland.harvard.edu> +Reported-by: Alexandru Cornea <alexandru.cornea@intel.com> +Tested-by: Alexandru Cornea <alexandru.cornea@intel.com> +Fixes: 8520f38099cc ("USB: change hub initialization sleeps to delayed_work") +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +[lizf: Backported to 3.4: add forward declaration of hub_release()] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/usb/core/hub.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +--- a/drivers/usb/core/hub.c ++++ b/drivers/usb/core/hub.c +@@ -156,6 +156,7 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rws + #define HUB_DEBOUNCE_STABLE 100 + + ++static void hub_release(struct kref *kref); + static int usb_reset_and_verify_device(struct usb_device *udev); + + static inline char *portspeed(struct usb_hub *hub, int portstatus) +@@ -797,10 +798,20 @@ static void hub_activate(struct usb_hub + unsigned delay; + + /* Continue a partial initialization */ +- if (type == HUB_INIT2) +- goto init2; +- if (type == HUB_INIT3) ++ if (type == HUB_INIT2 || type == HUB_INIT3) { ++ device_lock(hub->intfdev); ++ ++ /* Was the hub disconnected while we were waiting? */ ++ if (hub->disconnected) { ++ device_unlock(hub->intfdev); ++ kref_put(&hub->kref, hub_release); ++ return; ++ } ++ if (type == HUB_INIT2) ++ goto init2; + goto init3; ++ } ++ kref_get(&hub->kref); + + /* The superspeed hub except for root hub has to use Hub Depth + * value as an offset into the route string to locate the bits +@@ -990,6 +1001,7 @@ static void hub_activate(struct usb_hub + PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func3); + schedule_delayed_work(&hub->init_work, + msecs_to_jiffies(delay)); ++ device_unlock(hub->intfdev); + return; /* Continues at init3: below */ + } else { + msleep(delay); +@@ -1010,6 +1022,11 @@ static void hub_activate(struct usb_hub + /* Allow autosuspend if it was suppressed */ + if (type <= HUB_INIT3) + usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); ++ ++ if (type == HUB_INIT2 || type == HUB_INIT3) ++ device_unlock(hub->intfdev); ++ ++ kref_put(&hub->kref, hub_release); + } + + /* Implement the continuations for the delays above */ diff --git a/patches/usb-ipaq.c-fix-a-timeout-loop.patch b/patches/usb-ipaq.c-fix-a-timeout-loop.patch new file mode 100644 index 0000000..005c10c --- /dev/null +++ b/patches/usb-ipaq.c-fix-a-timeout-loop.patch @@ -0,0 +1,31 @@ +From abdc9a3b4bac97add99e1d77dc6d28623afe682b Mon Sep 17 00:00:00 2001 +From: Dan Carpenter <dan.carpenter@oracle.com> +Date: Wed, 16 Dec 2015 14:06:37 +0300 +Subject: USB: ipaq.c: fix a timeout loop + +commit abdc9a3b4bac97add99e1d77dc6d28623afe682b upstream. + +The code expects the loop to end with "retries" set to zero but, because +it is a post-op, it will end set to -1. I have fixed this by moving the +decrement inside the loop. + +Fixes: 014aa2a3c32e ('USB: ipaq: minor ipaq_open() cleanup.') +Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/usb/serial/ipaq.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/usb/serial/ipaq.c ++++ b/drivers/usb/serial/ipaq.c +@@ -550,7 +550,8 @@ static int ipaq_open(struct tty_struct * + * through. Since this has a reasonably high failure rate, we retry + * several times. + */ +- while (retries--) { ++ while (retries) { ++ retries--; + result = usb_control_msg(serial->dev, + usb_sndctrlpipe(serial->dev, 0), 0x22, 0x21, + 0x1, 0, NULL, 0, 100); diff --git a/patches/usb-whci-hcd-add-check-for-dma-mapping-error.patch b/patches/usb-whci-hcd-add-check-for-dma-mapping-error.patch new file mode 100644 index 0000000..beaaa48 --- /dev/null +++ b/patches/usb-whci-hcd-add-check-for-dma-mapping-error.patch @@ -0,0 +1,31 @@ +From f9fa1887dcf26bd346665a6ae3d3f53dec54cba1 Mon Sep 17 00:00:00 2001 +From: Alexey Khoroshilov <khoroshilov@ispras.ru> +Date: Sat, 21 Nov 2015 00:36:44 +0300 +Subject: USB: whci-hcd: add check for dma mapping error + +commit f9fa1887dcf26bd346665a6ae3d3f53dec54cba1 upstream. + +qset_fill_page_list() do not check for dma mapping errors. + +Found by Linux Driver Verification project (linuxtesting.org). + +Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/usb/host/whci/qset.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/usb/host/whci/qset.c ++++ b/drivers/usb/host/whci/qset.c +@@ -377,6 +377,10 @@ static int qset_fill_page_list(struct wh + if (std->pl_virt == NULL) + return -ENOMEM; + std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE); ++ if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) { ++ kfree(std->pl_virt); ++ return -EFAULT; ++ } + + for (p = 0; p < std->num_pointers; p++) { + std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr); diff --git a/patches/usb-xhci-fix-config-fail-of-fs-hub-behind-a-hs-hub-with-mtt.patch b/patches/usb-xhci-fix-config-fail-of-fs-hub-behind-a-hs-hub-with-mtt.patch new file mode 100644 index 0000000..e51d704 --- /dev/null +++ b/patches/usb-xhci-fix-config-fail-of-fs-hub-behind-a-hs-hub-with-mtt.patch @@ -0,0 +1,45 @@ +From 096b110a3dd3c868e4610937c80d2e3f3357c1a9 Mon Sep 17 00:00:00 2001 +From: Chunfeng Yun <chunfeng.yun@mediatek.com> +Date: Fri, 4 Dec 2015 15:53:43 +0200 +Subject: usb: xhci: fix config fail of FS hub behind a HS hub with MTT + +commit 096b110a3dd3c868e4610937c80d2e3f3357c1a9 upstream. + +if a full speed hub connects to a high speed hub which +supports MTT, the MTT field of its slot context will be set +to 1 when xHCI driver setups an xHCI virtual device in +xhci_setup_addressable_virt_dev(); once usb core fetch its +hub descriptor, and need to update the xHC's internal data +structures for the device, the HUB field of its slot context +will be set to 1 too, meanwhile MTT is also set before, +this will cause configure endpoint command fail, so in the +case, we should clear MTT to 0 for full speed hub according +to section 6.2.2 + +Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com> +Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/usb/host/xhci.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/usb/host/xhci.c ++++ b/drivers/usb/host/xhci.c +@@ -4123,8 +4123,16 @@ int xhci_update_hub_device(struct usb_hc + ctrl_ctx->add_flags |= cpu_to_le32(SLOT_FLAG); + slot_ctx = xhci_get_slot_ctx(xhci, config_cmd->in_ctx); + slot_ctx->dev_info |= cpu_to_le32(DEV_HUB); ++ /* ++ * refer to section 6.2.2: MTT should be 0 for full speed hub, ++ * but it may be already set to 1 when setup an xHCI virtual ++ * device, so clear it anyway. ++ */ + if (tt->multi) + slot_ctx->dev_info |= cpu_to_le32(DEV_MTT); ++ else if (hdev->speed == USB_SPEED_FULL) ++ slot_ctx->dev_info &= cpu_to_le32(~DEV_MTT); ++ + if (xhci->hci_version > 0x95) { + xhci_dbg(xhci, "xHCI version %x needs hub " + "TT think time and number of ports\n", diff --git a/patches/vgaarb-fix-signal-handling-in-vga_get.patch b/patches/vgaarb-fix-signal-handling-in-vga_get.patch new file mode 100644 index 0000000..35ff3bc --- /dev/null +++ b/patches/vgaarb-fix-signal-handling-in-vga_get.patch @@ -0,0 +1,40 @@ +From 9f5bd30818c42c6c36a51f93b4df75a2ea2bd85e Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" <kirill@shutemov.name> +Date: Mon, 30 Nov 2015 04:17:31 +0200 +Subject: vgaarb: fix signal handling in vga_get() + +commit 9f5bd30818c42c6c36a51f93b4df75a2ea2bd85e upstream. + +There are few defects in vga_get() related to signal hadning: + + - we shouldn't check for pending signals for TASK_UNINTERRUPTIBLE + case; + + - if we found pending signal we must remove ourself from wait queue + and change task state back to running; + + - -ERESTARTSYS is more appropriate, I guess. + +Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name> +Reviewed-by: David Herrmann <dh.herrmann@gmail.com> +Signed-off-by: Dave Airlie <airlied@redhat.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/gpu/vga/vgaarb.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/vga/vgaarb.c ++++ b/drivers/gpu/vga/vgaarb.c +@@ -381,8 +381,10 @@ int vga_get(struct pci_dev *pdev, unsign + set_current_state(interruptible ? + TASK_INTERRUPTIBLE : + TASK_UNINTERRUPTIBLE); +- if (signal_pending(current)) { +- rc = -EINTR; ++ if (interruptible && signal_pending(current)) { ++ __set_current_state(TASK_RUNNING); ++ remove_wait_queue(&vga_wait_queue, &wait); ++ rc = -ERESTARTSYS; + break; + } + schedule(); diff --git a/patches/xen-add-ring_copy_request.patch b/patches/xen-add-ring_copy_request.patch new file mode 100644 index 0000000..e3d8b1d --- /dev/null +++ b/patches/xen-add-ring_copy_request.patch @@ -0,0 +1,52 @@ +From 454d5d882c7e412b840e3c99010fe81a9862f6fb Mon Sep 17 00:00:00 2001 +From: David Vrabel <david.vrabel@citrix.com> +Date: Fri, 30 Oct 2015 14:58:08 +0000 +Subject: xen: Add RING_COPY_REQUEST() + +commit 454d5d882c7e412b840e3c99010fe81a9862f6fb upstream. + +Using RING_GET_REQUEST() on a shared ring is easy to use incorrectly +(i.e., by not considering that the other end may alter the data in the +shared ring while it is being inspected). Safe usage of a request +generally requires taking a local copy. + +Provide a RING_COPY_REQUEST() macro to use instead of +RING_GET_REQUEST() and an open-coded memcpy(). This takes care of +ensuring that the copy is done correctly regardless of any possible +compiler optimizations. + +Use a volatile source to prevent the compiler from reordering or +omitting the copy. + +This is part of XSA155. + +Signed-off-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + include/xen/interface/io/ring.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/include/xen/interface/io/ring.h ++++ b/include/xen/interface/io/ring.h +@@ -181,6 +181,20 @@ struct __name##_back_ring { \ + #define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + ++/* ++ * Get a local copy of a request. ++ * ++ * Use this in preference to RING_GET_REQUEST() so all processing is ++ * done on a local copy that cannot be modified by the other end. ++ * ++ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this ++ * to be ineffective where _req is a struct which consists of only bitfields. ++ */ ++#define RING_COPY_REQUEST(_r, _idx, _req) do { \ ++ /* Use volatile to force the copy into _req. */ \ ++ *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ ++} while (0) ++ + #define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + diff --git a/patches/xen-blkback-only-read-request-operation-from-shared-ring-once.patch b/patches/xen-blkback-only-read-request-operation-from-shared-ring-once.patch new file mode 100644 index 0000000..3523525 --- /dev/null +++ b/patches/xen-blkback-only-read-request-operation-from-shared-ring-once.patch @@ -0,0 +1,53 @@ +From 1f13d75ccb806260079e0679d55d9253e370ec8a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Tue, 3 Nov 2015 16:34:09 +0000 +Subject: xen-blkback: only read request operation from shared ring once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 1f13d75ccb806260079e0679d55d9253e370ec8a upstream. + +A compiler may load a switch statement value multiple times, which could +be bad when the value is in memory shared with the frontend. + +When converting a non-native request to a native one, ensure that +src->operation is only loaded once by using READ_ONCE(). + +This is part of XSA155. + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Signed-off-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +[lizf: Backported to 3.4: + - adjust context + - call ACCESS_ONCE instead of READ_ONCE] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/block/xen-blkback/common.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/block/xen-blkback/common.h ++++ b/drivers/block/xen-blkback/common.h +@@ -256,8 +256,8 @@ static inline void blkif_get_x86_32_req( + struct blkif_x86_32_request *src) + { + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; +- dst->operation = src->operation; +- switch (src->operation) { ++ dst->operation = ACCESS_ONCE(src->operation); ++ switch (dst->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: +@@ -292,8 +292,8 @@ static inline void blkif_get_x86_64_req( + struct blkif_x86_64_request *src) + { + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; +- dst->operation = src->operation; +- switch (src->operation) { ++ dst->operation = ACCESS_ONCE(src->operation); ++ switch (dst->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: diff --git a/patches/xen-netback-don-t-use-last-request-to-determine-minimum-tx-credit.patch b/patches/xen-netback-don-t-use-last-request-to-determine-minimum-tx-credit.patch new file mode 100644 index 0000000..00802d8 --- /dev/null +++ b/patches/xen-netback-don-t-use-last-request-to-determine-minimum-tx-credit.patch @@ -0,0 +1,37 @@ +From 0f589967a73f1f30ab4ac4dd9ce0bb399b4d6357 Mon Sep 17 00:00:00 2001 +From: David Vrabel <david.vrabel@citrix.com> +Date: Fri, 30 Oct 2015 15:16:01 +0000 +Subject: xen-netback: don't use last request to determine minimum Tx credit + +commit 0f589967a73f1f30ab4ac4dd9ce0bb399b4d6357 upstream. + +The last from guest transmitted request gives no indication about the +minimum amount of credit that the guest might need to send a packet +since the last packet might have been a small one. + +Instead allow for the worst case 128 KiB packet. + +This is part of XSA155. + +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Signed-off-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +[lizf: Backported to 3.4: s/queue/vif/g] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/net/xen-netback/netback.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -856,9 +856,7 @@ static void tx_add_credit(struct xenvif + * Allow a burst big enough to transmit a jumbo packet of up to 128kB. + * Otherwise the interface can seize up due to insufficient credit. + */ +- max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; +- max_burst = min(max_burst, 131072UL); +- max_burst = max(max_burst, vif->credit_bytes); ++ max_burst = max(131072UL, vif->credit_bytes); + + /* Take care that adding a new chunk of credit doesn't wrap to zero. */ + max_credit = vif->remaining_credit + vif->credit_bytes; diff --git a/patches/xen-netback-use-ring_copy_request-throughout.patch b/patches/xen-netback-use-ring_copy_request-throughout.patch new file mode 100644 index 0000000..64a2364 --- /dev/null +++ b/patches/xen-netback-use-ring_copy_request-throughout.patch @@ -0,0 +1,129 @@ +From 68a33bfd8403e4e22847165d149823a2e0e67c9c Mon Sep 17 00:00:00 2001 +From: David Vrabel <david.vrabel@citrix.com> +Date: Fri, 30 Oct 2015 15:17:06 +0000 +Subject: xen-netback: use RING_COPY_REQUEST() throughout + +commit 68a33bfd8403e4e22847165d149823a2e0e67c9c upstream. + +Instead of open-coding memcpy()s and directly accessing Tx and Rx +requests, use the new RING_COPY_REQUEST() that ensures the local copy +is correct. + +This is more than is strictly necessary for guest Rx requests since +only the id and gref fields are used and it is harmless if the +frontend modifies these. + +This is part of XSA155. + +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Signed-off-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +[lizf: Backported to 3.4: + - adjust context + - s/queue/vif/g] +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/net/xen-netback/netback.c | 30 ++++++++++++++---------------- + 1 file changed, 14 insertions(+), 16 deletions(-) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -398,17 +398,17 @@ static struct netbk_rx_meta *get_next_rx + struct netrx_pending_operations *npo) + { + struct netbk_rx_meta *meta; +- struct xen_netif_rx_request *req; ++ struct xen_netif_rx_request req; + +- req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); ++ RING_COPY_REQUEST(&vif->rx, vif->rx.req_cons++, &req); + + meta = npo->meta + npo->meta_prod++; + meta->gso_size = 0; + meta->size = 0; +- meta->id = req->id; ++ meta->id = req.id; + + npo->copy_off = 0; +- npo->copy_gref = req->gref; ++ npo->copy_gref = req.gref; + + return meta; + } +@@ -510,7 +510,7 @@ static int netbk_gop_skb(struct sk_buff + struct xenvif *vif = netdev_priv(skb->dev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; +- struct xen_netif_rx_request *req; ++ struct xen_netif_rx_request req; + struct netbk_rx_meta *meta; + unsigned char *data; + int head = 1; +@@ -520,14 +520,14 @@ static int netbk_gop_skb(struct sk_buff + + /* Set up a GSO prefix descriptor, if necessary */ + if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { +- req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); ++ RING_COPY_REQUEST(&vif->rx, vif->rx.req_cons++, &req); + meta = npo->meta + npo->meta_prod++; + meta->gso_size = skb_shinfo(skb)->gso_size; + meta->size = 0; +- meta->id = req->id; ++ meta->id = req.id; + } + +- req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); ++ RING_COPY_REQUEST(&vif->rx, vif->rx.req_cons++, &req); + meta = npo->meta + npo->meta_prod++; + + if (!vif->gso_prefix) +@@ -536,9 +536,9 @@ static int netbk_gop_skb(struct sk_buff + meta->gso_size = 0; + + meta->size = 0; +- meta->id = req->id; ++ meta->id = req.id; + npo->copy_off = 0; +- npo->copy_gref = req->gref; ++ npo->copy_gref = req.gref; + + data = skb->data; + while (data < skb_tail_pointer(skb)) { +@@ -882,7 +882,7 @@ static void netbk_tx_err(struct xenvif * + make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); + if (cons == end) + break; +- txp = RING_GET_REQUEST(&vif->tx, cons++); ++ RING_COPY_REQUEST(&vif->tx, cons++, txp); + } while (1); + vif->tx.req_cons = cons; + xen_netbk_check_rx_xenvif(vif); +@@ -943,8 +943,7 @@ static int netbk_count_requests(struct x + drop_err = -E2BIG; + } + +- memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), +- sizeof(*txp)); ++ RING_COPY_REQUEST(&vif->tx, cons + slots, txp); + + /* If the guest submitted a frame >= 64 KiB then + * first->size overflowed and following slots will +@@ -1226,8 +1225,7 @@ static int xen_netbk_get_extras(struct x + return -EBADR; + } + +- memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), +- sizeof(extra)); ++ RING_COPY_REQUEST(&vif->tx, cons, &extra); + if (unlikely(!extra.type || + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + vif->tx.req_cons = ++cons; +@@ -1422,7 +1420,7 @@ static unsigned xen_netbk_tx_build_gops( + + idx = vif->tx.req_cons; + rmb(); /* Ensure that we see the request before we copy it. */ +- memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); ++ RING_COPY_REQUEST(&vif->tx, idx, &txreq); + + /* Credit-based scheduling. */ + if (txreq.size > vif->remaining_credit && diff --git a/patches/xen-pciback-do-not-install-an-irq-handler-for-msi-interrupts.patch b/patches/xen-pciback-do-not-install-an-irq-handler-for-msi-interrupts.patch new file mode 100644 index 0000000..4ed0c3c --- /dev/null +++ b/patches/xen-pciback-do-not-install-an-irq-handler-for-msi-interrupts.patch @@ -0,0 +1,77 @@ +From a396f3a210c3a61e94d6b87ec05a75d0be2a60d0 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Mon, 2 Nov 2015 17:24:08 -0500 +Subject: xen/pciback: Do not install an IRQ handler for MSI interrupts. + +commit a396f3a210c3a61e94d6b87ec05a75d0be2a60d0 upstream. + +Otherwise an guest can subvert the generic MSI code to trigger +an BUG_ON condition during MSI interrupt freeing: + + for (i = 0; i < entry->nvec_used; i++) + BUG_ON(irq_has_action(entry->irq + i)); + +Xen PCI backed installs an IRQ handler (request_irq) for +the dev->irq whenever the guest writes PCI_COMMAND_MEMORY +(or PCI_COMMAND_IO) to the PCI_COMMAND register. This is +done in case the device has legacy interrupts the GSI line +is shared by the backend devices. + +To subvert the backend the guest needs to make the backend +to change the dev->irq from the GSI to the MSI interrupt line, +make the backend allocate an interrupt handler, and then command +the backend to free the MSI interrupt and hit the BUG_ON. + +Since the backend only calls 'request_irq' when the guest +writes to the PCI_COMMAND register the guest needs to call +XEN_PCI_OP_enable_msi before any other operation. This will +cause the generic MSI code to setup an MSI entry and +populate dev->irq with the new PIRQ value. + +Then the guest can write to PCI_COMMAND PCI_COMMAND_MEMORY +and cause the backend to setup an IRQ handler for dev->irq +(which instead of the GSI value has the MSI pirq). See +'xen_pcibk_control_isr'. + +Then the guest disables the MSI: XEN_PCI_OP_disable_msi +which ends up triggering the BUG_ON condition in 'free_msi_irqs' +as there is an IRQ handler for the entry->irq (dev->irq). + +Note that this cannot be done using MSI-X as the generic +code does not over-write dev->irq with the MSI-X PIRQ values. + +The patch inhibits setting up the IRQ handler if MSI or +MSI-X (for symmetry reasons) code had been called successfully. + +P.S. +Xen PCIBack when it sets up the device for the guest consumption +ends up writting 0 to the PCI_COMMAND (see xen_pcibk_reset_device). +XSA-120 addendum patch removed that - however when upstreaming said +addendum we found that it caused issues with qemu upstream. That +has now been fixed in qemu upstream. + +This is part of XSA-157 + +Reviewed-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/xen/xen-pciback/pciback_ops.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -69,6 +69,13 @@ static void xen_pcibk_control_isr(struct + enable ? "enable" : "disable"); + + if (enable) { ++ /* ++ * The MSI or MSI-X should not have an IRQ handler. Otherwise ++ * if the guest terminates we BUG_ON in free_msi_irqs. ++ */ ++ if (dev->msi_enabled || dev->msix_enabled) ++ goto out; ++ + rc = request_irq(dev_data->irq, + xen_pcibk_guest_interrupt, IRQF_SHARED, + dev_data->irq_name, dev); diff --git a/patches/xen-pciback-don-t-allow-msi-x-ops-if-pci_command_memory-is-not-set.patch b/patches/xen-pciback-don-t-allow-msi-x-ops-if-pci_command_memory-is-not-set.patch new file mode 100644 index 0000000..fea4fe3 --- /dev/null +++ b/patches/xen-pciback-don-t-allow-msi-x-ops-if-pci_command_memory-is-not-set.patch @@ -0,0 +1,60 @@ +From 408fb0e5aa7fda0059db282ff58c3b2a4278baa0 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Mon, 2 Nov 2015 18:13:27 -0500 +Subject: xen/pciback: Don't allow MSI-X ops if PCI_COMMAND_MEMORY is not set. + +commit 408fb0e5aa7fda0059db282ff58c3b2a4278baa0 upstream. + +commit f598282f51 ("PCI: Fix the NIU MSI-X problem in a better way") +teaches us that dealing with MSI-X can be troublesome. + +Further checks in the MSI-X architecture shows that if the +PCI_COMMAND_MEMORY bit is turned of in the PCI_COMMAND we +may not be able to access the BAR (since they are memory regions). + +Since the MSI-X tables are located in there.. that can lead +to us causing PCIe errors. Inhibit us performing any +operation on the MSI-X unless the MEMORY bit is set. + +Note that Xen hypervisor with: +"x86/MSI-X: access MSI-X table only after having enabled MSI-X" +will return: +xen_pciback: 0000:0a:00.1: error -6 enabling MSI-X for guest 3! + +When the generic MSI code tries to setup the PIRQ without +MEMORY bit set. Which means with later versions of Xen +(4.6) this patch is not neccessary. + +This is part of XSA-157 + +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/xen/xen-pciback/pciback_ops.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -211,6 +211,7 @@ int xen_pcibk_enable_msix(struct xen_pci + struct xen_pcibk_dev_data *dev_data; + int i, result; + struct msix_entry *entries; ++ u16 cmd; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", +@@ -222,7 +223,12 @@ int xen_pcibk_enable_msix(struct xen_pci + if (dev->msix_enabled) + return -EALREADY; + +- if (dev->msi_enabled) ++ /* ++ * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able ++ * to access the BARs where the MSI-X entries reside. ++ */ ++ pci_read_config_word(dev, PCI_COMMAND, &cmd); ++ if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) + return -ENXIO; + + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); diff --git a/patches/xen-pciback-for-xen_pci_op_disable_msi-x-only-disable-if-device-has-msi-x-enabled.patch b/patches/xen-pciback-for-xen_pci_op_disable_msi-x-only-disable-if-device-has-msi-x-enabled.patch new file mode 100644 index 0000000..b505045 --- /dev/null +++ b/patches/xen-pciback-for-xen_pci_op_disable_msi-x-only-disable-if-device-has-msi-x-enabled.patch @@ -0,0 +1,102 @@ +From 7cfb905b9638982862f0331b36ccaaca5d383b49 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Wed, 1 Apr 2015 10:49:47 -0400 +Subject: xen/pciback: For XEN_PCI_OP_disable_msi[|x] only disable if device + has MSI(X) enabled. + +commit 7cfb905b9638982862f0331b36ccaaca5d383b49 upstream. + +Otherwise just continue on, returning the same values as +previously (return of 0, and op->result has the PIRQ value). + +This does not change the behavior of XEN_PCI_OP_disable_msi[|x]. + +The pci_disable_msi or pci_disable_msix have the checks for +msi_enabled or msix_enabled so they will error out immediately. + +However the guest can still call these operations and cause +us to disable the 'ack_intr'. That means the backend IRQ handler +for the legacy interrupt will not respond to interrupts anymore. + +This will lead to (if the device is causing an interrupt storm) +for the Linux generic code to disable the interrupt line. + +Naturally this will only happen if the device in question +is plugged in on the motherboard on shared level interrupt GSI. + +This is part of XSA-157 + +Reviewed-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/xen/xen-pciback/pciback_ops.c | 33 ++++++++++++++++++++------------- + 1 file changed, 20 insertions(+), 13 deletions(-) + +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -184,20 +184,23 @@ static + int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { +- struct xen_pcibk_dev_data *dev_data; +- + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", + pci_name(dev)); +- pci_disable_msi(dev); + ++ if (dev->msi_enabled) { ++ struct xen_pcibk_dev_data *dev_data; ++ ++ pci_disable_msi(dev); ++ ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ dev_data->ack_intr = 1; ++ } + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), + op->value); +- dev_data = pci_get_drvdata(dev); +- if (dev_data) +- dev_data->ack_intr = 1; + return 0; + } + +@@ -263,23 +266,27 @@ static + int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { +- struct xen_pcibk_dev_data *dev_data; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", + pci_name(dev)); +- pci_disable_msix(dev); + ++ if (dev->msix_enabled) { ++ struct xen_pcibk_dev_data *dev_data; ++ ++ pci_disable_msix(dev); ++ ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ dev_data->ack_intr = 1; ++ } + /* + * SR-IOV devices (which don't have any legacy IRQ) have + * an undefined IRQ value of zero. + */ + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) +- printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), +- op->value); +- dev_data = pci_get_drvdata(dev); +- if (dev_data) +- dev_data->ack_intr = 1; ++ printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", ++ pci_name(dev), op->value); + return 0; + } + #endif diff --git a/patches/xen-pciback-return-error-on-xen_pci_op_enable_msi-when-device-has-msi-or-msi-x-enabled.patch b/patches/xen-pciback-return-error-on-xen_pci_op_enable_msi-when-device-has-msi-or-msi-x-enabled.patch new file mode 100644 index 0000000..f0179f5 --- /dev/null +++ b/patches/xen-pciback-return-error-on-xen_pci_op_enable_msi-when-device-has-msi-or-msi-x-enabled.patch @@ -0,0 +1,58 @@ +From 56441f3c8e5bd45aab10dd9f8c505dd4bec03b0d Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Fri, 3 Apr 2015 11:08:22 -0400 +Subject: xen/pciback: Return error on XEN_PCI_OP_enable_msi when device has + MSI or MSI-X enabled + +commit 56441f3c8e5bd45aab10dd9f8c505dd4bec03b0d upstream. + +The guest sequence of: + + a) XEN_PCI_OP_enable_msi + b) XEN_PCI_OP_enable_msi + c) XEN_PCI_OP_disable_msi + +results in hitting an BUG_ON condition in the msi.c code. + +The MSI code uses an dev->msi_list to which it adds MSI entries. +Under the above conditions an BUG_ON() can be hit. The device +passed in the guest MUST have MSI capability. + +The a) adds the entry to the dev->msi_list and sets msi_enabled. +The b) adds a second entry but adding in to SysFS fails (duplicate entry) +and deletes all of the entries from msi_list and returns (with msi_enabled +is still set). c) pci_disable_msi passes the msi_enabled checks and hits: + +BUG_ON(list_empty(dev_to_msi_list(&dev->dev))); + +and blows up. + +The patch adds a simple check in the XEN_PCI_OP_enable_msi to guard +against that. The check for msix_enabled is not stricly neccessary. + +This is part of XSA-157. + +Reviewed-by: David Vrabel <david.vrabel@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/xen/xen-pciback/pciback_ops.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -143,7 +143,12 @@ int xen_pcibk_enable_msi(struct xen_pcib + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); + +- status = pci_enable_msi(dev); ++ if (dev->msi_enabled) ++ status = -EALREADY; ++ else if (dev->msix_enabled) ++ status = -ENXIO; ++ else ++ status = pci_enable_msi(dev); + + if (status) { + pr_warn_ratelimited(DRV_NAME ": %s: error enabling MSI for guest %u: err %d\n", diff --git a/patches/xen-pciback-return-error-on-xen_pci_op_enable_msix-when-device-has-msi-or-msi-x-enabled.patch b/patches/xen-pciback-return-error-on-xen_pci_op_enable_msix-when-device-has-msi-or-msi-x-enabled.patch new file mode 100644 index 0000000..e0e6261 --- /dev/null +++ b/patches/xen-pciback-return-error-on-xen_pci_op_enable_msix-when-device-has-msi-or-msi-x-enabled.patch @@ -0,0 +1,60 @@ +From 5e0ce1455c09dd61d029b8ad45d82e1ac0b6c4c9 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Mon, 2 Nov 2015 18:07:44 -0500 +Subject: xen/pciback: Return error on XEN_PCI_OP_enable_msix when device has + MSI or MSI-X enabled + +commit 5e0ce1455c09dd61d029b8ad45d82e1ac0b6c4c9 upstream. + +The guest sequence of: + + a) XEN_PCI_OP_enable_msix + b) XEN_PCI_OP_enable_msix + +results in hitting an NULL pointer due to using freed pointers. + +The device passed in the guest MUST have MSI-X capability. + +The a) constructs and SysFS representation of MSI and MSI groups. +The b) adds a second set of them but adding in to SysFS fails (duplicate entry). +'populate_msi_sysfs' frees the newly allocated msi_irq_groups (note that +in a) pdev->msi_irq_groups is still set) and also free's ALL of the +MSI-X entries of the device (the ones allocated in step a) and b)). + +The unwind code: 'free_msi_irqs' deletes all the entries and tries to +delete the pdev->msi_irq_groups (which hasn't been set to NULL). +However the pointers in the SysFS are already freed and we hit an +NULL pointer further on when 'strlen' is attempted on a freed pointer. + +The patch adds a simple check in the XEN_PCI_OP_enable_msix to guard +against that. The check for msi_enabled is not stricly neccessary. + +This is part of XSA-157 + +Reviewed-by: David Vrabel <david.vrabel@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/xen/xen-pciback/pciback_ops.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -205,9 +205,16 @@ int xen_pcibk_enable_msix(struct xen_pci + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", + pci_name(dev)); ++ + if (op->value > SH_INFO_MAX_VEC) + return -EINVAL; + ++ if (dev->msix_enabled) ++ return -EALREADY; ++ ++ if (dev->msi_enabled) ++ return -ENXIO; ++ + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) + return -ENOMEM; diff --git a/patches/xen-pciback-save-xen_pci_op-commands-before-processing-it.patch b/patches/xen-pciback-save-xen_pci_op-commands-before-processing-it.patch new file mode 100644 index 0000000..99f00fc --- /dev/null +++ b/patches/xen-pciback-save-xen_pci_op-commands-before-processing-it.patch @@ -0,0 +1,75 @@ +From 8135cf8b092723dbfcc611fe6fdcb3a36c9951c5 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Mon, 16 Nov 2015 12:40:48 -0500 +Subject: xen/pciback: Save xen_pci_op commands before processing it + +commit 8135cf8b092723dbfcc611fe6fdcb3a36c9951c5 upstream. + +Double fetch vulnerabilities that happen when a variable is +fetched twice from shared memory but a security check is only +performed the first time. + +The xen_pcibk_do_op function performs a switch statements on the op->cmd +value which is stored in shared memory. Interestingly this can result +in a double fetch vulnerability depending on the performed compiler +optimization. + +This patch fixes it by saving the xen_pci_op command before +processing it. We also use 'barrier' to make sure that the +compiler does not perform any optimization. + +This is part of XSA155. + +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Jan Beulich <JBeulich@suse.com> +Signed-off-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Zefan Li <lizefan@huawei.com> +--- + drivers/xen/xen-pciback/pciback.h | 1 + + drivers/xen/xen-pciback/pciback_ops.c | 15 ++++++++++++++- + 2 files changed, 15 insertions(+), 1 deletion(-) + +--- a/drivers/xen/xen-pciback/pciback.h ++++ b/drivers/xen/xen-pciback/pciback.h +@@ -37,6 +37,7 @@ struct xen_pcibk_device { + struct xen_pci_sharedinfo *sh_info; + unsigned long flags; + struct work_struct op_work; ++ struct xen_pci_op op; + }; + + struct xen_pcibk_dev_data { +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -297,9 +297,11 @@ void xen_pcibk_do_op(struct work_struct + container_of(data, struct xen_pcibk_device, op_work); + struct pci_dev *dev; + struct xen_pcibk_dev_data *dev_data = NULL; +- struct xen_pci_op *op = &pdev->sh_info->op; ++ struct xen_pci_op *op = &pdev->op; + int test_intx = 0; + ++ *op = pdev->sh_info->op; ++ barrier(); + dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); + + if (dev == NULL) +@@ -341,6 +343,17 @@ void xen_pcibk_do_op(struct work_struct + if ((dev_data->enable_intx != test_intx)) + xen_pcibk_control_isr(dev, 0 /* no reset */); + } ++ pdev->sh_info->op.err = op->err; ++ pdev->sh_info->op.value = op->value; ++#ifdef CONFIG_PCI_MSI ++ if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { ++ unsigned int i; ++ ++ for (i = 0; i < op->value; i++) ++ pdev->sh_info->op.msix_entries[i].vector = ++ op->msix_entries[i].vector; ++ } ++#endif + /* Tell the driver domain that we're done. */ + wmb(); + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); |