diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-05-18 17:40:02 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-05-18 17:40:02 -0700 |
commit | e157379d8694853217e7097a0379d7e7daaf6dcd (patch) | |
tree | f9e53b1688fba09af9bc0f822f292cf2644281af | |
parent | b8704b87f415a7de55cce66a6643fbc0a558c7e9 (diff) | |
download | stable-queue-e157379d8694853217e7097a0379d7e7daaf6dcd.tar.gz |
.38 patches
23 files changed, 1796 insertions, 0 deletions
diff --git a/queue-2.6.38/block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch b/queue-2.6.38/block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch new file mode 100644 index 0000000000..c1a6c5f885 --- /dev/null +++ b/queue-2.6.38/block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch @@ -0,0 +1,98 @@ +From 02e352287a40bd456eb78df705bf888bc3161d3f Mon Sep 17 00:00:00 2001 +From: Tejun Heo <tj@kernel.org> +Date: Fri, 29 Apr 2011 10:15:20 +0200 +Subject: block: rescan partitions on invalidated devices on -ENOMEDIA too + +From: Tejun Heo <tj@kernel.org> + +commit 02e352287a40bd456eb78df705bf888bc3161d3f upstream. + +__blkdev_get() doesn't rescan partitions if disk->fops->open() fails, +which leads to ghost partition devices lingering after medimum removal +is known to both the kernel and userland. The behavior also creates a +subtle inconsistency where O_NONBLOCK open, which doesn't fail even if +there's no medium, clears the ghots partitions, which is exploited to +work around the problem from userland. + +Fix it by updating __blkdev_get() to issue partition rescan after +-ENOMEDIA too. + +This was reported in the following bz. + + https://bugzilla.kernel.org/show_bug.cgi?id=13029 + +Stable: 2.6.38 + +Signed-off-by: Tejun Heo <tj@kernel.org> +Reported-by: David Zeuthen <zeuthen@gmail.com> +Reported-by: Martin Pitt <martin.pitt@ubuntu.com> +Reported-by: Kay Sievers <kay.sievers@vrfy.org> +Tested-by: Kay Sievers <kay.sievers@vrfy.org> +Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> +Signed-off-by: Jens Axboe <jaxboe@fusionio.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/block_dev.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +--- a/fs/block_dev.c ++++ b/fs/block_dev.c +@@ -1099,6 +1099,7 @@ static int __blkdev_get(struct block_dev + if (!bdev->bd_part) + goto out_clear; + ++ ret = 0; + if (disk->fops->open) { + ret = disk->fops->open(bdev, mode); + if (ret == -ERESTARTSYS) { +@@ -1114,9 +1115,18 @@ static int __blkdev_get(struct block_dev + mutex_unlock(&bdev->bd_mutex); + goto restart; + } +- if (ret) +- goto out_clear; + } ++ /* ++ * If the device is invalidated, rescan partition ++ * if open succeeded or failed with -ENOMEDIUM. ++ * The latter is necessary to prevent ghost ++ * partitions on a removed medium. ++ */ ++ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) ++ rescan_partitions(disk, bdev); ++ if (ret) ++ goto out_clear; ++ + if (!bdev->bd_openers) { + bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + bdi = blk_get_backing_dev_info(bdev); +@@ -1124,8 +1134,6 @@ static int __blkdev_get(struct block_dev + bdi = &default_backing_dev_info; + bdev_inode_switch_bdi(bdev->bd_inode, bdi); + } +- if (bdev->bd_invalidated) +- rescan_partitions(disk, bdev); + } else { + struct block_device *whole; + whole = bdget_disk(disk, 0); +@@ -1152,13 +1160,14 @@ static int __blkdev_get(struct block_dev + put_disk(disk); + disk = NULL; + if (bdev->bd_contains == bdev) { +- if (bdev->bd_disk->fops->open) { ++ ret = 0; ++ if (bdev->bd_disk->fops->open) + ret = bdev->bd_disk->fops->open(bdev, mode); +- if (ret) +- goto out_unlock_bdev; +- } +- if (bdev->bd_invalidated) ++ /* the same as first opener case, read comment there */ ++ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) + rescan_partitions(bdev->bd_disk, bdev); ++ if (ret) ++ goto out_unlock_bdev; + } + } + bdev->bd_openers++; diff --git a/queue-2.6.38/cdrom-always-check_disk_change-on-open.patch b/queue-2.6.38/cdrom-always-check_disk_change-on-open.patch new file mode 100644 index 0000000000..ef40be0ddb --- /dev/null +++ b/queue-2.6.38/cdrom-always-check_disk_change-on-open.patch @@ -0,0 +1,64 @@ +From bf2253a6f00e8fea5b026e471e9f0d0a1b3621f2 Mon Sep 17 00:00:00 2001 +From: Tejun Heo <tj@kernel.org> +Date: Fri, 29 Apr 2011 10:15:14 +0200 +Subject: cdrom: always check_disk_change() on open + +From: Tejun Heo <tj@kernel.org> + +commit bf2253a6f00e8fea5b026e471e9f0d0a1b3621f2 upstream. + +cdrom_open() called check_disk_change() after the rest of open path +succeeded which leads to the following bizarre behavior. + +* After media change, if the device opened without O_NONBLOCK, + open_for_data() naturally fails with -ENOMEDIA and + check_disk_change() is never called. The media is known to be gone + and the open failure makes it obvious to the userland but device + invalidation never happens. + +* But if the device is opened with O_NONBLOCK, all the checks are + bypassed and cdrom_open() doesn't notice that the media is not there + and check_disk_change() is called and invalidation happens. + +There's nothing to be gained by avoiding calling check_disk_change() +on open failure. Common cases end up calling check_disk_change() +anyway. All we get is inconsistent behavior. + +Fix it by moving check_disk_change() invocation to the top of +cdrom_open() so that it always gets called regardless of how the rest +of open proceeds. + +Stable: 2.6.38 + +Signed-off-by: Tejun Heo <tj@kernel.org> +Reported-by: Amit Shah <amit.shah@redhat.com> +Tested-by: Amit Shah <amit.shah@redhat.com> +Signed-off-by: Jens Axboe <jaxboe@fusionio.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/cdrom/cdrom.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/cdrom/cdrom.c ++++ b/drivers/cdrom/cdrom.c +@@ -986,6 +986,9 @@ int cdrom_open(struct cdrom_device_info + + cdinfo(CD_OPEN, "entering cdrom_open\n"); + ++ /* open is event synchronization point, check events first */ ++ check_disk_change(bdev); ++ + /* if this was a O_NONBLOCK open and we should honor the flags, + * do a quick open without drive/disc integrity checks. */ + cdi->use_count++; +@@ -1012,9 +1015,6 @@ int cdrom_open(struct cdrom_device_info + + cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n", + cdi->name, cdi->use_count); +- /* Do this on open. Don't wait for mount, because they might +- not be mounting, but opening with O_NONBLOCK */ +- check_disk_change(bdev); + return 0; + err_release: + if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { diff --git a/queue-2.6.38/cifs-add-fallback-in-is_path_accessible-for-old-servers.patch b/queue-2.6.38/cifs-add-fallback-in-is_path_accessible-for-old-servers.patch new file mode 100644 index 0000000000..8d19a91838 --- /dev/null +++ b/queue-2.6.38/cifs-add-fallback-in-is_path_accessible-for-old-servers.patch @@ -0,0 +1,36 @@ +From 221d1d797202984cb874e3ed9f1388593d34ee22 Mon Sep 17 00:00:00 2001 +From: Jeff Layton <jlayton@redhat.com> +Date: Tue, 17 May 2011 06:40:30 -0400 +Subject: cifs: add fallback in is_path_accessible for old servers + +From: Jeff Layton <jlayton@redhat.com> + +commit 221d1d797202984cb874e3ed9f1388593d34ee22 upstream. + +The is_path_accessible check uses a QPathInfo call, which isn't +supported by ancient win9x era servers. Fall back to an older +SMBQueryInfo call if it fails with the magic error codes. + +Reported-and-Tested-by: Sandro Bonazzola <sandro.bonazzola@gmail.com> +Signed-off-by: Jeff Layton <jlayton@redhat.com> +Signed-off-by: Steve French <sfrench@us.ibm.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/cifs/connect.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/cifs/connect.c ++++ b/fs/cifs/connect.c +@@ -2657,6 +2657,11 @@ is_path_accessible(int xid, struct cifsT + 0 /* not legacy */, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); ++ ++ if (rc == -EOPNOTSUPP || rc == -EINVAL) ++ rc = SMBQueryInformation(xid, tcon, full_path, pfile_info, ++ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & ++ CIFS_MOUNT_MAP_SPECIAL_CHR); + kfree(pfile_info); + return rc; + } diff --git a/queue-2.6.38/clocksource-install-completely-before-selecting.patch b/queue-2.6.38/clocksource-install-completely-before-selecting.patch new file mode 100644 index 0000000000..dc867861d7 --- /dev/null +++ b/queue-2.6.38/clocksource-install-completely-before-selecting.patch @@ -0,0 +1,58 @@ +From e05b2efb82596905ebfe88e8612ee81dec9b6592 Mon Sep 17 00:00:00 2001 +From: john stultz <johnstul@us.ibm.com> +Date: Wed, 4 May 2011 18:16:50 -0700 +Subject: clocksource: Install completely before selecting + +From: john stultz <johnstul@us.ibm.com> + +commit e05b2efb82596905ebfe88e8612ee81dec9b6592 upstream. + +Christian Hoffmann reported that the command line clocksource override +with acpi_pm timer fails: + + Kernel command line: <SNIP> clocksource=acpi_pm + hpet clockevent registered + Switching to clocksource hpet + Override clocksource acpi_pm is not HRT compatible. + Cannot switch while in HRT/NOHZ mode. + +The watchdog code is what enables CLOCK_SOURCE_VALID_FOR_HRES, but we +actually end up selecting the clocksource before we enqueue it into +the watchdog list, so that's why we see the warning and fail to switch +to acpi_pm timer as requested. That's particularly bad when we want to +debug timekeeping related problems in early boot. + +Put the selection call last. + +Reported-by: Christian Hoffmann <email@christianhoffmann.info> +Signed-off-by: John Stultz <johnstul@us.ibm.com> +Link: http://lkml.kernel.org/r/%3C1304558210.2943.24.camel%40work-vm%3E +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/time/clocksource.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -685,8 +685,8 @@ int __clocksource_register_scale(struct + /* Add clocksource to the clcoksource list */ + mutex_lock(&clocksource_mutex); + clocksource_enqueue(cs); +- clocksource_select(); + clocksource_enqueue_watchdog(cs); ++ clocksource_select(); + mutex_unlock(&clocksource_mutex); + return 0; + } +@@ -706,8 +706,8 @@ int clocksource_register(struct clocksou + + mutex_lock(&clocksource_mutex); + clocksource_enqueue(cs); +- clocksource_select(); + clocksource_enqueue_watchdog(cs); ++ clocksource_select(); + mutex_unlock(&clocksource_mutex); + return 0; + } diff --git a/queue-2.6.38/drm-radeon-kms-fix-extended-lvds-info-parsing.patch b/queue-2.6.38/drm-radeon-kms-fix-extended-lvds-info-parsing.patch new file mode 100644 index 0000000000..1471fd1f72 --- /dev/null +++ b/queue-2.6.38/drm-radeon-kms-fix-extended-lvds-info-parsing.patch @@ -0,0 +1,47 @@ +From 05fa7ea7d23980de0014417a0e0af2048a0f9fc1 Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexdeucher@gmail.com> +Date: Wed, 11 May 2011 14:02:07 -0400 +Subject: drm/radeon/kms: fix extended lvds info parsing + +From: Alex Deucher <alexdeucher@gmail.com> + +commit 05fa7ea7d23980de0014417a0e0af2048a0f9fc1 upstream. + +On rev <= 1.1 tables, the offset is absolute, +on newer tables, it's relative. + +Fixes: +https://bugzilla.redhat.com/show_bug.cgi?id=700326 + +Signed-off-by: Alex Deucher <alexdeucher@gmail.com> +Reviewed-by: Jerome Glisse <jglisse@redhat.com> +Signed-off-by: Dave Airlie <airlied@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/gpu/drm/radeon/radeon_atombios.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/radeon/radeon_atombios.c ++++ b/drivers/gpu/drm/radeon/radeon_atombios.c +@@ -1574,9 +1574,17 @@ struct radeon_encoder_atom_dig *radeon_a + ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record; + ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record; + bool bad_record = false; +- u8 *record = (u8 *)(mode_info->atom_context->bios + +- data_offset + +- le16_to_cpu(lvds_info->info.usModePatchTableOffset)); ++ u8 *record; ++ ++ if ((frev == 1) && (crev < 2)) ++ /* absolute */ ++ record = (u8 *)(mode_info->atom_context->bios + ++ le16_to_cpu(lvds_info->info.usModePatchTableOffset)); ++ else ++ /* relative */ ++ record = (u8 *)(mode_info->atom_context->bios + ++ data_offset + ++ le16_to_cpu(lvds_info->info.usModePatchTableOffset)); + while (*record != ATOM_RECORD_END_TYPE) { + switch (*record) { + case LCD_MODE_PATCH_RECORD_MODE_TYPE: diff --git a/queue-2.6.38/fix-cx88-remote-control-input.patch b/queue-2.6.38/fix-cx88-remote-control-input.patch new file mode 100644 index 0000000000..b0109b9aff --- /dev/null +++ b/queue-2.6.38/fix-cx88-remote-control-input.patch @@ -0,0 +1,32 @@ +From 2a164d02dd34c6b49a3f0995900e0f8af102b804 Mon Sep 17 00:00:00 2001 +From: Lawrence Rust <lvr@softsystem.co.uk> +Date: Fri, 8 Apr 2011 09:50:45 -0300 +Subject: [media] Fix cx88 remote control input + +From: Lawrence Rust <lvr@softsystem.co.uk> + +commit 2a164d02dd34c6b49a3f0995900e0f8af102b804 upstream. + +In the IR interrupt handler of cx88-input.c there's a 32-bit multiply +overflow which causes IR pulse durations to be incorrectly calculated. + +This is a regression caused by commit 2997137be8eba. + +Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/media/video/cx88/cx88-input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/media/video/cx88/cx88-input.c ++++ b/drivers/media/video/cx88/cx88-input.c +@@ -523,7 +523,7 @@ void cx88_ir_irq(struct cx88_core *core) + for (todo = 32; todo > 0; todo -= bits) { + ev.pulse = samples & 0x80000000 ? false : true; + bits = min(todo, 32U - fls(ev.pulse ? samples : ~samples)); +- ev.duration = (bits * NSEC_PER_SEC) / (1000 * ir_samplerate); ++ ev.duration = (bits * (NSEC_PER_SEC / 1000)) / ir_samplerate; + ir_raw_event_store_with_filter(ir->dev, &ev); + samples <<= bits; + } diff --git a/queue-2.6.38/libata-fix-oops-when-lpm-is-used-with-pmp.patch b/queue-2.6.38/libata-fix-oops-when-lpm-is-used-with-pmp.patch new file mode 100644 index 0000000000..07ce52a7df --- /dev/null +++ b/queue-2.6.38/libata-fix-oops-when-lpm-is-used-with-pmp.patch @@ -0,0 +1,75 @@ +From 5f6f12ccf3aa42cfc0c5bde9228df0c843dd63f7 Mon Sep 17 00:00:00 2001 +From: Tejun Heo <tj@kernel.org> +Date: Mon, 9 May 2011 16:04:11 +0200 +Subject: libata: fix oops when LPM is used with PMP + +From: Tejun Heo <tj@kernel.org> + +commit 5f6f12ccf3aa42cfc0c5bde9228df0c843dd63f7 upstream. + +ae01b2493c (libata: Implement ATA_FLAG_NO_DIPM and apply it to mcp65) +added ATA_FLAG_NO_DIPM and made ata_eh_set_lpm() check the flag. +However, @ap is NULL if @link points to a PMP link and thus the +unconditional @ap->flags dereference leads to the following oops. + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 + IP: [<ffffffff813f98e1>] ata_eh_recover+0x9a1/0x1510 + ... + Pid: 295, comm: scsi_eh_4 Tainted: P 2.6.38.5-core2 #1 System76, Inc. Serval Professional/Serval Professional + RIP: 0010:[<ffffffff813f98e1>] [<ffffffff813f98e1>] ata_eh_recover+0x9a1/0x1510 + RSP: 0018:ffff880132defbf0 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: ffff880132f40000 RCX: 0000000000000000 + RDX: ffff88013377c000 RSI: ffff880132f40000 RDI: 0000000000000000 + RBP: ffff880132defce0 R08: ffff88013377dc58 R09: ffff880132defd98 + R10: 0000000000000000 R11: 00000000ffffffff R12: 0000000000000000 + R13: 0000000000000000 R14: ffff88013377c000 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff8800bf700000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: 0000000000000018 CR3: 0000000001a03000 CR4: 00000000000406e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + Process scsi_eh_4 (pid: 295, threadinfo ffff880132dee000, task ffff880133b416c0) + Stack: + 0000000000000000 ffff880132defcc0 0000000000000000 ffff880132f42738 + ffffffff813ee8f0 ffffffff813eefe0 ffff880132defd98 ffff88013377f190 + ffffffffa00b3e30 ffffffff813ef030 0000000032defc60 ffff880100000000 + Call Trace: + [<ffffffff81400867>] sata_pmp_error_handler+0x607/0xc30 + [<ffffffffa00b273f>] ahci_error_handler+0x1f/0x70 [libahci] + [<ffffffff813faade>] ata_scsi_error+0x5be/0x900 + [<ffffffff813cf724>] scsi_error_handler+0x124/0x650 + [<ffffffff810834b6>] kthread+0x96/0xa0 + [<ffffffff8100cd64>] kernel_thread_helper+0x4/0x10 + Code: 8b 95 70 ff ff ff b8 00 00 00 00 48 3b 9a 10 2e 00 00 48 0f 44 c2 48 89 85 70 ff ff ff 48 8b 8d 70 ff ff ff f6 83 69 02 00 00 01 <48> 8b 41 18 0f 85 48 01 00 00 48 85 c9 74 12 48 8b 51 08 48 83 + RIP [<ffffffff813f98e1>] ata_eh_recover+0x9a1/0x1510 + RSP <ffff880132defbf0> + CR2: 0000000000000018 + +Fix it by testing @link->ap->flags instead. + +stable: ATA_FLAG_NO_DIPM was added during 2.6.39 cycle but was + backported to 2.6.37 and 38. This is a fix for that and thus + also applicable to 2.6.37 and 38. + +Signed-off-by: Tejun Heo <tj@kernel.org> +Reported-by: "Nathan A. Mourey II" <nmoureyii@ne.rr.com> +LKML-Reference: <1304555277.2059.2.camel@localhost.localdomain> +Cc: Connor H <cmdkhh@gmail.com> +Signed-off-by: Jeff Garzik <jgarzik@pobox.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/ata/libata-eh.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/ata/libata-eh.c ++++ b/drivers/ata/libata-eh.c +@@ -3276,7 +3276,7 @@ static int ata_eh_set_lpm(struct ata_lin + struct ata_eh_context *ehc = &link->eh_context; + struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; + enum ata_lpm_policy old_policy = link->lpm_policy; +- bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM; ++ bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; + unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; + unsigned int err_mask; + int rc; diff --git a/queue-2.6.38/megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch b/queue-2.6.38/megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch new file mode 100644 index 0000000000..8db82c020c --- /dev/null +++ b/queue-2.6.38/megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch @@ -0,0 +1,89 @@ +From 98cb7e4413d189cd2b54daf993a4667d9788c0bb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no> +Date: Wed, 19 Jan 2011 10:01:14 +0100 +Subject: [SCSI] megaraid_sas: Sanity check user supplied length before passing it to dma_alloc_coherent() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no> + +commit 98cb7e4413d189cd2b54daf993a4667d9788c0bb upstream. + +The ioc->sgl[i].iov_len value is supplied by the ioctl caller, and can be +zero in some cases. Assume that's valid and continue without error. + +Fixes (multiple individual reports of the same problem for quite a while): + +http://marc.info/?l=linux-ide&m=128941801715301 +http://bugs.debian.org/604627 +http://www.mail-archive.com/linux-poweredge@dell.com/msg02575.html + +megasas: Failed to alloc kernel SGL buffer for IOCTL + +and + +[ 69.162538] ------------[ cut here ]------------ +[ 69.162806] kernel BUG at /build/buildd/linux-2.6.32/lib/swiotlb.c:368! +[ 69.163134] invalid opcode: 0000 [#1] SMP +[ 69.163570] last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map +[ 69.163975] CPU 0 +[ 69.164227] Modules linked in: fbcon tileblit font bitblit softcursor vga16fb vgastate ioatdma radeon ttm drm_kms_helper shpchp drm i2c_algo_bit lp parport floppy pata_jmicron megaraid_sas igb dca +[ 69.167419] Pid: 1206, comm: smartctl Tainted: G W 2.6.32-25-server #45-Ubuntu X8DTN +[ 69.167843] RIP: 0010:[<ffffffff812c4dc5>] [<ffffffff812c4dc5>] map_single+0x255/0x260 +[ 69.168370] RSP: 0018:ffff88081c0ebc58 EFLAGS: 00010246 +[ 69.168655] RAX: 000000000003bffc RBX: 00000000ffffffff RCX: 0000000000000002 +[ 69.169000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88001dffe000 +[ 69.169346] RBP: ffff88081c0ebcb8 R08: 0000000000000000 R09: ffff880000030840 +[ 69.169691] R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000000 +[ 69.170036] R13: 00000000ffffffff R14: 0000000000000001 R15: 0000000000200000 +[ 69.170382] FS: 00007fb8de189720(0000) GS:ffff88001de00000(0000) knlGS:0000000000000000 +[ 69.170794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 69.171094] CR2: 00007fb8dd59237c CR3: 000000081a790000 CR4: 00000000000006f0 +[ 69.171439] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 69.171784] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +[ 69.172130] Process smartctl (pid: 1206, threadinfo ffff88081c0ea000, task ffff88081a760000) +[ 69.194513] Stack: +[ 69.205788] 0000000000000034 00000002817e3390 0000000000000000 ffff88081c0ebe00 +[ 69.217739] <0> 0000000000000000 000000000003bffc 0000000000000000 0000000000000000 +[ 69.241250] <0> 0000000000000000 00000000ffffffff ffff88081c5b4080 ffff88081c0ebe00 +[ 69.277310] Call Trace: +[ 69.289278] [<ffffffff812c52ac>] swiotlb_alloc_coherent+0xec/0x130 +[ 69.301118] [<ffffffff81038b31>] x86_swiotlb_alloc_coherent+0x61/0x70 +[ 69.313045] [<ffffffffa002d0ce>] megasas_mgmt_fw_ioctl+0x1ae/0x690 [megaraid_sas] +[ 69.336399] [<ffffffffa002d748>] megasas_mgmt_ioctl_fw+0x198/0x240 [megaraid_sas] +[ 69.359346] [<ffffffffa002f695>] megasas_mgmt_ioctl+0x35/0x50 [megaraid_sas] +[ 69.370902] [<ffffffff81153b12>] vfs_ioctl+0x22/0xa0 +[ 69.382322] [<ffffffff8115da2a>] ? alloc_fd+0x10a/0x150 +[ 69.393622] [<ffffffff81153cb1>] do_vfs_ioctl+0x81/0x410 +[ 69.404696] [<ffffffff8155cc13>] ? do_page_fault+0x153/0x3b0 +[ 69.415761] [<ffffffff811540c1>] sys_ioctl+0x81/0xa0 +[ 69.426640] [<ffffffff810121b2>] system_call_fastpath+0x16/0x1b +[ 69.437491] Code: fe ff ff 48 8b 3d 74 38 76 00 41 bf 00 00 20 00 e8 51 f5 d7 ff 83 e0 ff 48 05 ff 07 00 00 48 c1 e8 0b 48 89 45 c8 e9 13 fe ff ff <0f> 0b eb fe 0f 1f 80 00 00 00 00 55 48 89 e5 48 83 ec 20 4c 89 +[ 69.478216] RIP [<ffffffff812c4dc5>] map_single+0x255/0x260 +[ 69.489668] RSP <ffff88081c0ebc58> +[ 69.500975] ---[ end trace 6a2181b634e2abc7 ]--- + +Reported-by: Bokhan Artem <aptem@ngs.ru> +Reported by: Marc-Christian Petersen <m.c.p@gmx.de> +Signed-off-by: Bjørn Mork <bjorn@mork.no> +Cc: "Benz, Michael" <Michael.Benz@lsi.com> +Signed-off-by: James Bottomley <James.Bottomley@suse.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -4611,6 +4611,9 @@ megasas_mgmt_fw_ioctl(struct megasas_ins + * For each user buffer, create a mirror buffer and copy in + */ + for (i = 0; i < ioc->sge_count; i++) { ++ if (!ioc->sgl[i].iov_len) ++ continue; ++ + kbuff_arr[i] = dma_alloc_coherent(&instance->pdev->dev, + ioc->sgl[i].iov_len, + &buf_handle, GFP_KERNEL); diff --git a/queue-2.6.38/rapidio-fix-default-routing-initialization.patch b/queue-2.6.38/rapidio-fix-default-routing-initialization.patch new file mode 100644 index 0000000000..b502ab933f --- /dev/null +++ b/queue-2.6.38/rapidio-fix-default-routing-initialization.patch @@ -0,0 +1,94 @@ +From 0bf2461fdd9008290cf429e50e4f362dafab4249 Mon Sep 17 00:00:00 2001 +From: Alexandre Bounine <alexandre.bounine@idt.com> +Date: Tue, 17 May 2011 15:44:08 -0700 +Subject: rapidio: fix default routing initialization + +From: Alexandre Bounine <alexandre.bounine@idt.com> + +commit 0bf2461fdd9008290cf429e50e4f362dafab4249 upstream. + +Fix switch initialization to ensure that all switches have default routing +disabled. This guarantees that no unexpected RapidIO packets arrive to +the default port set by reset and there is no default routing destination +until it is properly configured by software. + +This update also unifies handling of unmapped destinations by tsi57x, IDT +Gen1 and IDT Gen2 switches. + +Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com> +Cc: Kumar Gala <galak@kernel.crashing.org> +Cc: Matt Porter <mporter@kernel.crashing.org> +Cc: Li Yang <leoli@freescale.com> +Cc: Thomas Moll <thomas.moll@sysgo.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/rapidio/switches/idt_gen2.c | 9 +++++++++ + drivers/rapidio/switches/idtcps.c | 6 ++++++ + drivers/rapidio/switches/tsi57x.c | 6 ++++++ + 3 files changed, 21 insertions(+) + +--- a/drivers/rapidio/switches/idt_gen2.c ++++ b/drivers/rapidio/switches/idt_gen2.c +@@ -95,6 +95,9 @@ idtg2_route_add_entry(struct rio_mport * + else + table++; + ++ if (route_port == RIO_INVALID_ROUTE) ++ route_port = IDT_DEFAULT_ROUTE; ++ + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + +@@ -411,6 +414,12 @@ static int idtg2_switch_init(struct rio_ + rdev->rswitch->em_handle = idtg2_em_handler; + rdev->rswitch->sw_sysfs = idtg2_sysfs; + ++ if (do_enum) { ++ /* Ensure that default routing is disabled on startup */ ++ rio_write_config_32(rdev, ++ RIO_STD_RTE_DEFAULT_PORT, IDT_NO_ROUTE); ++ } ++ + return 0; + } + +--- a/drivers/rapidio/switches/idtcps.c ++++ b/drivers/rapidio/switches/idtcps.c +@@ -26,6 +26,9 @@ idtcps_route_add_entry(struct rio_mport + { + u32 result; + ++ if (route_port == RIO_INVALID_ROUTE) ++ route_port = CPS_DEFAULT_ROUTE; ++ + if (table == RIO_GLOBAL_TABLE) { + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid); +@@ -130,6 +133,9 @@ static int idtcps_switch_init(struct rio + /* set TVAL = ~50us */ + rio_write_config_32(rdev, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); ++ /* Ensure that default routing is disabled on startup */ ++ rio_write_config_32(rdev, ++ RIO_STD_RTE_DEFAULT_PORT, CPS_NO_ROUTE); + } + + return 0; +--- a/drivers/rapidio/switches/tsi57x.c ++++ b/drivers/rapidio/switches/tsi57x.c +@@ -303,6 +303,12 @@ static int tsi57x_switch_init(struct rio + rdev->rswitch->em_init = tsi57x_em_init; + rdev->rswitch->em_handle = tsi57x_em_handler; + ++ if (do_enum) { ++ /* Ensure that default routing is disabled on startup */ ++ rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT, ++ RIO_INVALID_ROUTE); ++ } ++ + return 0; + } + diff --git a/queue-2.6.38/revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch b/queue-2.6.38/revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch new file mode 100644 index 0000000000..7c33f86a73 --- /dev/null +++ b/queue-2.6.38/revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch @@ -0,0 +1,78 @@ +From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001 +From: Chris Ball <cjb@laptop.org> +Date: Mon, 16 May 2011 11:32:26 -0400 +Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate work instances" + +From: Chris Ball <cjb@laptop.org> + +commit 86f315bbb2374f1f077500ad131dd9b71856e697 upstream. + +This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has +been reported to cause boot/resume-time crashes for some users: + +https://bbs.archlinux.org/viewtopic.php?id=118751. + +Signed-off-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/mmc/core/host.c | 9 +++++---- + include/linux/mmc/host.h | 1 + + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/mmc/core/host.c ++++ b/drivers/mmc/core/host.c +@@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(st + spin_unlock_irqrestore(&host->clk_lock, flags); + return; + } +- mmc_claim_host(host); ++ mutex_lock(&host->clk_gate_mutex); + spin_lock_irqsave(&host->clk_lock, flags); + if (!host->clk_requests) { + spin_unlock_irqrestore(&host->clk_lock, flags); +@@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(st + pr_debug("%s: gated MCI clock\n", mmc_hostname(host)); + } + spin_unlock_irqrestore(&host->clk_lock, flags); +- mmc_release_host(host); ++ mutex_unlock(&host->clk_gate_mutex); + } + + /* +@@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host + { + unsigned long flags; + +- mmc_claim_host(host); ++ mutex_lock(&host->clk_gate_mutex); + spin_lock_irqsave(&host->clk_lock, flags); + if (host->clk_gated) { + spin_unlock_irqrestore(&host->clk_lock, flags); +@@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host + } + host->clk_requests++; + spin_unlock_irqrestore(&host->clk_lock, flags); +- mmc_release_host(host); ++ mutex_unlock(&host->clk_gate_mutex); + } + + /** +@@ -218,6 +218,7 @@ static inline void mmc_host_clk_init(str + host->clk_gated = false; + INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); + spin_lock_init(&host->clk_lock); ++ mutex_init(&host->clk_gate_mutex); + } + + /** +--- a/include/linux/mmc/host.h ++++ b/include/linux/mmc/host.h +@@ -183,6 +183,7 @@ struct mmc_host { + struct work_struct clk_gate_work; /* delayed clock gate */ + unsigned int clk_old; /* old clock value cache */ + spinlock_t clk_lock; /* lock for clk fields */ ++ struct mutex clk_gate_mutex; /* mutex for clock gating */ + #endif + + /* host specific block data */ diff --git a/queue-2.6.38/revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch b/queue-2.6.38/revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch new file mode 100644 index 0000000000..d9e5946168 --- /dev/null +++ b/queue-2.6.38/revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch @@ -0,0 +1,47 @@ +From 328935e6348c6a7cb34798a68c326f4b8372e68a Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <borislav.petkov@amd.com> +Date: Tue, 17 May 2011 14:55:18 +0200 +Subject: Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors" + +From: Borislav Petkov <borislav.petkov@amd.com> + +commit 328935e6348c6a7cb34798a68c326f4b8372e68a upstream. + +This reverts commit e20a2d205c05cef6b5783df339a7d54adeb50962, as it crashes +certain boxes with specific AMD CPU models. + +Moving the lower endpoint of the Erratum 400 check to accomodate +earlier K8 revisions (A-E) opens a can of worms which is simply +not worth to fix properly by tweaking the errata checking +framework: + +* missing IntPenging MSR on revisions < CG cause #GP: + +http://marc.info/?l=linux-kernel&m=130541471818831 + +* makes earlier revisions use the LAPIC timer instead of the C1E +idle routine which switches to HPET, thus not waking up in +deeper C-states: + +http://lkml.org/lkml/2011/4/24/20 + +Therefore, leave the original boundary starting with K8-revF. + +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/kernel/cpu/amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -681,7 +681,7 @@ cpu_dev_register(amd_cpu_dev); + */ + + const int amd_erratum_400[] = +- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), ++ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), + AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); + EXPORT_SYMBOL_GPL(amd_erratum_400); + diff --git a/queue-2.6.38/series b/queue-2.6.38/series index f3f7015c20..63a266cfb5 100644 --- a/queue-2.6.38/series +++ b/queue-2.6.38/series @@ -43,3 +43,25 @@ hydra-fix-regression-caused-during-net_device_ops-conversion.patch ehea-fix-memory-hotplug-oops.patch libertas-fix-cmdpendingq-locking.patch zorro8390-fix-regression-caused-during-net_device_ops-conversion.patch +tmpfs-fix-race-between-umount-and-writepage.patch +tmpfs-fix-race-between-swapoff-and-writepage.patch +tmpfs-fix-off-by-one-in-max_blocks-checks.patch +tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch +libata-fix-oops-when-lpm-is-used-with-pmp.patch +drm-radeon-kms-fix-extended-lvds-info-parsing.patch +revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch +cifs-add-fallback-in-is_path_accessible-for-old-servers.patch +rapidio-fix-default-routing-initialization.patch +revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch +x86-amd-fix-arat-feature-setting-again.patch +block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch +clocksource-install-completely-before-selecting.patch +tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch +x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch +fix-cx88-remote-control-input.patch +v4l-release-module-if-subdev-registration-fails.patch +x86-fix-uv-bau-for-non-consecutive-nasids.patch +x86-mce-amd-fix-leaving-freed-data-in-a-list.patch +megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch +cdrom-always-check_disk_change-on-open.patch +vmxnet3-fix-inconsistent-lro-state-after-initialization.patch diff --git a/queue-2.6.38/tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch b/queue-2.6.38/tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch new file mode 100644 index 0000000000..c494dc3f19 --- /dev/null +++ b/queue-2.6.38/tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch @@ -0,0 +1,85 @@ +From 07f4beb0b5bbfaf36a64aa00d59e670ec578a95a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Mon, 16 May 2011 11:07:48 +0200 +Subject: tick: Clear broadcast active bit when switching to oneshot + +From: Thomas Gleixner <tglx@linutronix.de> + +commit 07f4beb0b5bbfaf36a64aa00d59e670ec578a95a upstream. + +The first cpu which switches from periodic to oneshot mode switches +also the broadcast device into oneshot mode. The broadcast device +serves as a backup for per cpu timers which stop in deeper +C-states. To avoid starvation of the cpus which might be in idle and +depend on broadcast mode it marks the other cpus as broadcast active +and sets the brodcast expiry value of those cpus to the next tick. + +The oneshot mode broadcast bit for the other cpus is sticky and gets +only cleared when those cpus exit idle. If a cpu was not idle while +the bit got set in consequence the bit prevents that the broadcast +device is armed on behalf of that cpu when it enters idle for the +first time after it switched to oneshot mode. + +In most cases that goes unnoticed as one of the other cpus has usually +a timer pending which keeps the broadcast device armed with a short +timeout. Now if the only cpu which has a short timer active has the +bit set then the broadcast device will not be armed on behalf of that +cpu and will fire way after the expected timer expiry. In the case of +Christians bug report it took ~145 seconds which is about half of the +wrap around time of HPET (the limit for that device) due to the fact +that all other cpus had no timers armed which expired before the 145 +seconds timeframe. + +The solution is simply to clear the broadcast active bit +unconditionally when a cpu switches to oneshot mode after the first +cpu switched the broadcast device over. It's not idle at that point +otherwise it would not be executing that code. + +[ I fundamentally hate that broadcast crap. Why the heck thought some + folks that when going into deep idle it's a brilliant concept to + switch off the last device which brings the cpu back from that + state? ] + +Thanks to Christian for providing all the valuable debug information! + +Reported-and-tested-by: Christian Hoffmann <email@christianhoffmann.info> +Cc: John Stultz <johnstul@us.ibm.com> +Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1105161105170.3078%40ionos%3E +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/time/tick-broadcast.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/kernel/time/tick-broadcast.c ++++ b/kernel/time/tick-broadcast.c +@@ -523,10 +523,11 @@ static void tick_broadcast_init_next_eve + */ + void tick_broadcast_setup_oneshot(struct clock_event_device *bc) + { ++ int cpu = smp_processor_id(); ++ + /* Set it up only once ! */ + if (bc->event_handler != tick_handle_oneshot_broadcast) { + int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; +- int cpu = smp_processor_id(); + + bc->event_handler = tick_handle_oneshot_broadcast; + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); +@@ -552,6 +553,15 @@ void tick_broadcast_setup_oneshot(struct + tick_broadcast_set_event(tick_next_period, 1); + } else + bc->next_event.tv64 = KTIME_MAX; ++ } else { ++ /* ++ * The first cpu which switches to oneshot mode sets ++ * the bit for all other cpus which are in the general ++ * (periodic) broadcast mask. So the bit is set and ++ * would prevent the first broadcast enter after this ++ * to program the bc device. ++ */ ++ tick_broadcast_clear_oneshot(cpu); + } + } + diff --git a/queue-2.6.38/tmpfs-fix-off-by-one-in-max_blocks-checks.patch b/queue-2.6.38/tmpfs-fix-off-by-one-in-max_blocks-checks.patch new file mode 100644 index 0000000000..463fb1320d --- /dev/null +++ b/queue-2.6.38/tmpfs-fix-off-by-one-in-max_blocks-checks.patch @@ -0,0 +1,49 @@ +From fc5da22ae35d4720be59af8787a8a6d5e4da9517 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Thu, 14 Apr 2011 15:22:07 -0700 +Subject: tmpfs: fix off-by-one in max_blocks checks + +From: Hugh Dickins <hughd@google.com> + +commit fc5da22ae35d4720be59af8787a8a6d5e4da9517 upstream. + +If you fill up a tmpfs, df was showing + + tmpfs 460800 - - - /tmp + +because of an off-by-one in the max_blocks checks. Fix it so df shows + + tmpfs 460800 460800 0 100% /tmp + +Signed-off-by: Hugh Dickins <hughd@google.com> +Cc: Tim Chen <tim.c.chen@linux.intel.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + mm/shmem.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -422,7 +422,8 @@ static swp_entry_t *shmem_swp_alloc(stru + * a waste to allocate index if we cannot allocate data. + */ + if (sbinfo->max_blocks) { +- if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0) ++ if (percpu_counter_compare(&sbinfo->used_blocks, ++ sbinfo->max_blocks - 1) >= 0) + return ERR_PTR(-ENOSPC); + percpu_counter_inc(&sbinfo->used_blocks); + spin_lock(&inode->i_lock); +@@ -1404,7 +1405,8 @@ repeat: + shmem_swp_unmap(entry); + sbinfo = SHMEM_SB(inode->i_sb); + if (sbinfo->max_blocks) { +- if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) || ++ if (percpu_counter_compare(&sbinfo->used_blocks, ++ sbinfo->max_blocks) >= 0 || + shmem_acct_block(info->flags)) { + spin_unlock(&info->lock); + error = -ENOSPC; diff --git a/queue-2.6.38/tmpfs-fix-race-between-swapoff-and-writepage.patch b/queue-2.6.38/tmpfs-fix-race-between-swapoff-and-writepage.patch new file mode 100644 index 0000000000..f929e6e039 --- /dev/null +++ b/queue-2.6.38/tmpfs-fix-race-between-swapoff-and-writepage.patch @@ -0,0 +1,55 @@ +From 05bf86b4ccfd0f197da61c67bd372111d15a6620 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sat, 14 May 2011 12:06:42 -0700 +Subject: tmpfs: fix race between swapoff and writepage + +From: Hugh Dickins <hughd@google.com> + +commit 05bf86b4ccfd0f197da61c67bd372111d15a6620 upstream. + +Shame on me! Commit b1dea800ac39 "tmpfs: fix race between umount and +writepage" fixed the advertized race, but introduced another: as even +its comment makes clear, we cannot safely rely on a peek at list_empty() +while holding no lock - until info->swapped is set, shmem_unuse_inode() +may delete any formerly-swapped inode from the shmem_swaplist, which +in this case would leave a swap area impossible to swapoff. + +Although I don't relish taking the mutex every time, I don't care much +for the alternatives either; and at least the peek at list_empty() in +shmem_evict_inode() (a hotter path since most inodes would never have +been swapped) remains safe, because we already truncated the whole file. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1037,7 +1037,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) + struct address_space *mapping; + unsigned long index; + struct inode *inode; +- bool unlock_mutex = false; + + BUG_ON(!PageLocked(page)); + mapping = page->mapping; +@@ -1072,15 +1071,14 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) + * we've taken the spinlock, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under both locks. + */ +- if (swap.val && list_empty(&info->swaplist)) { ++ if (swap.val) { + mutex_lock(&shmem_swaplist_mutex); +- /* move instead of add in case we're racing */ +- list_move_tail(&info->swaplist, &shmem_swaplist); +- unlock_mutex = true; ++ if (list_empty(&info->swaplist)) ++ list_add_tail(&info->swaplist, &shmem_swaplist); + } + + spin_lock(&info->lock); +- if (unlock_mutex) ++ if (swap.val) + mutex_unlock(&shmem_swaplist_mutex); + + if (index >= info->next_index) { diff --git a/queue-2.6.38/tmpfs-fix-race-between-umount-and-writepage.patch b/queue-2.6.38/tmpfs-fix-race-between-umount-and-writepage.patch new file mode 100644 index 0000000000..9a1234a736 --- /dev/null +++ b/queue-2.6.38/tmpfs-fix-race-between-umount-and-writepage.patch @@ -0,0 +1,158 @@ +From b1dea800ac39599301d4bb8dcf2b1d29c2558211 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Wed, 11 May 2011 15:13:36 -0700 +Subject: tmpfs: fix race between umount and writepage + +From: Hugh Dickins <hughd@google.com> + +commit b1dea800ac39599301d4bb8dcf2b1d29c2558211 upstream. + +Konstanin Khlebnikov reports that a dangerous race between umount and +shmem_writepage can be reproduced by this script: + + for i in {1..300} ; do + mkdir $i + while true ; do + mount -t tmpfs none $i + dd if=/dev/zero of=$i/test bs=1M count=$(($RANDOM % 100)) + umount $i + done & + done + +on a 6xCPU node with 8Gb RAM: kernel very unstable after this accident. =) + +Kernel log: + + VFS: Busy inodes after unmount of tmpfs. + Self-destruct in 5 seconds. Have a nice day... + + WARNING: at lib/list_debug.c:53 __list_del_entry+0x8d/0x98() + list_del corruption. prev->next should be ffff880222fdaac8, but was (null) + Pid: 11222, comm: mount.tmpfs Not tainted 2.6.39-rc2+ #4 + Call Trace: + warn_slowpath_common+0x80/0x98 + warn_slowpath_fmt+0x41/0x43 + __list_del_entry+0x8d/0x98 + evict+0x50/0x113 + iput+0x138/0x141 + ... + BUG: unable to handle kernel paging request at ffffffffffffffff + IP: shmem_free_blocks+0x18/0x4c + Pid: 10422, comm: dd Tainted: G W 2.6.39-rc2+ #4 + Call Trace: + shmem_recalc_inode+0x61/0x66 + shmem_writepage+0xba/0x1dc + pageout+0x13c/0x24c + shrink_page_list+0x28e/0x4be + shrink_inactive_list+0x21f/0x382 + ... + +shmem_writepage() calls igrab() on the inode for the page which came from +page reclaim, to add it later into shmem_swaplist for swapoff operation. + +This igrab() can race with super-block deactivating process: + + shrink_inactive_list() deactivate_super() + pageout() tmpfs_fs_type->kill_sb() + shmem_writepage() kill_litter_super() + generic_shutdown_super() + evict_inodes() + igrab() + atomic_read(&inode->i_count) + skip-inode + iput() + if (!list_empty(&sb->s_inodes)) + printk("VFS: Busy inodes after... + +This igrap-iput pair was added in commit 1b1b32f2c6f6 "tmpfs: fix +shmem_swaplist races" based on incorrect assumptions: igrab() protects the +inode from concurrent eviction by deletion, but it does nothing to protect +it from concurrent unmounting, which goes ahead despite the raised +i_count. + +So this use of igrab() was wrong all along, but the race made much worse +in 2.6.37 when commit 63997e98a3be "split invalidate_inodes()" replaced +two attempts at invalidate_inodes() by a single evict_inodes(). + +Konstantin posted a plausible patch, raising sb->s_active too: I'm unsure +whether it was correct or not; but burnt once by igrab(), I am sure that +we don't want to rely more deeply upon externals here. + +Fix it by adding the inode to shmem_swaplist earlier, while the page lock +on page in page cache still secures the inode against eviction, without +artifically raising i_count. It was originally added later because +shmem_unuse_inode() is liable to remove an inode from the list while it's +unswapped; but we can guard against that by taking spinlock before +dropping mutex. + +Reported-by: Konstantin Khlebnikov <khlebnikov@openvz.org> +Signed-off-by: Hugh Dickins <hughd@google.com> +Tested-by: Konstantin Khlebnikov <khlebnikov@openvz.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + mm/shmem.c | 31 ++++++++++++++++++++----------- + 1 file changed, 20 insertions(+), 11 deletions(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1037,6 +1037,7 @@ static int shmem_writepage(struct page * + struct address_space *mapping; + unsigned long index; + struct inode *inode; ++ bool unlock_mutex = false; + + BUG_ON(!PageLocked(page)); + mapping = page->mapping; +@@ -1062,7 +1063,26 @@ static int shmem_writepage(struct page * + else + swap.val = 0; + ++ /* ++ * Add inode to shmem_unuse()'s list of swapped-out inodes, ++ * if it's not already there. Do it now because we cannot take ++ * mutex while holding spinlock, and must do so before the page ++ * is moved to swap cache, when its pagelock no longer protects ++ * the inode from eviction. But don't unlock the mutex until ++ * we've taken the spinlock, because shmem_unuse_inode() will ++ * prune a !swapped inode from the swaplist under both locks. ++ */ ++ if (swap.val && list_empty(&info->swaplist)) { ++ mutex_lock(&shmem_swaplist_mutex); ++ /* move instead of add in case we're racing */ ++ list_move_tail(&info->swaplist, &shmem_swaplist); ++ unlock_mutex = true; ++ } ++ + spin_lock(&info->lock); ++ if (unlock_mutex) ++ mutex_unlock(&shmem_swaplist_mutex); ++ + if (index >= info->next_index) { + BUG_ON(!(info->flags & SHMEM_TRUNCATE)); + goto unlock; +@@ -1082,22 +1102,11 @@ static int shmem_writepage(struct page * + remove_from_page_cache(page); + shmem_swp_set(info, entry, swap.val); + shmem_swp_unmap(entry); +- if (list_empty(&info->swaplist)) +- inode = igrab(inode); +- else +- inode = NULL; + spin_unlock(&info->lock); + swap_shmem_alloc(swap); + BUG_ON(page_mapped(page)); + page_cache_release(page); /* pagecache ref */ + swap_writepage(page, wbc); +- if (inode) { +- mutex_lock(&shmem_swaplist_mutex); +- /* move instead of add in case we're racing */ +- list_move_tail(&info->swaplist, &shmem_swaplist); +- mutex_unlock(&shmem_swaplist_mutex); +- iput(inode); +- } + return 0; + } + diff --git a/queue-2.6.38/tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch b/queue-2.6.38/tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch new file mode 100644 index 0000000000..8b395e5dba --- /dev/null +++ b/queue-2.6.38/tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch @@ -0,0 +1,86 @@ +From 59a16ead572330deb38e5848151d30ed1af754bc Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Wed, 11 May 2011 15:13:38 -0700 +Subject: tmpfs: fix spurious ENOSPC when racing with unswap + +From: Hugh Dickins <hughd@google.com> + +commit 59a16ead572330deb38e5848151d30ed1af754bc upstream. + +Testing the shmem_swaplist replacements for igrab() revealed another bug: +writes to /dev/loop0 on a tmpfs file which fills its filesystem were +sometimes failing with "Buffer I/O error"s. + +These came from ENOSPC failures of shmem_getpage(), when racing with +swapoff: the same could happen when racing with another shmem_getpage(), +pulling the page in from swap in between our find_lock_page() and our +taking the info->lock (though not in the single-threaded loop case). + +This is unacceptable, and surprising that I've not noticed it before: +it dates back many years, but (presumably) was made a lot easier to +reproduce in 2.6.36, which sited a page preallocation in the race window. + +Fix it by rechecking the page cache before settling on an ENOSPC error. + +Signed-off-by: Hugh Dickins <hughd@google.com> +Cc: Konstantin Khlebnikov <khlebnikov@openvz.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + mm/shmem.c | 32 ++++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 10 deletions(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1407,20 +1407,14 @@ repeat: + if (sbinfo->max_blocks) { + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks) >= 0 || +- shmem_acct_block(info->flags)) { +- spin_unlock(&info->lock); +- error = -ENOSPC; +- goto failed; +- } ++ shmem_acct_block(info->flags)) ++ goto nospace; + percpu_counter_inc(&sbinfo->used_blocks); + spin_lock(&inode->i_lock); + inode->i_blocks += BLOCKS_PER_PAGE; + spin_unlock(&inode->i_lock); +- } else if (shmem_acct_block(info->flags)) { +- spin_unlock(&info->lock); +- error = -ENOSPC; +- goto failed; +- } ++ } else if (shmem_acct_block(info->flags)) ++ goto nospace; + + if (!filepage) { + int ret; +@@ -1500,6 +1494,24 @@ done: + error = 0; + goto out; + ++nospace: ++ /* ++ * Perhaps the page was brought in from swap between find_lock_page ++ * and taking info->lock? We allow for that at add_to_page_cache_lru, ++ * but must also avoid reporting a spurious ENOSPC while working on a ++ * full tmpfs. (When filepage has been passed in to shmem_getpage, it ++ * is already in page cache, which prevents this race from occurring.) ++ */ ++ if (!filepage) { ++ struct page *page = find_get_page(mapping, idx); ++ if (page) { ++ spin_unlock(&info->lock); ++ page_cache_release(page); ++ goto repeat; ++ } ++ } ++ spin_unlock(&info->lock); ++ error = -ENOSPC; + failed: + if (*pagep != filepage) { + unlock_page(filepage); diff --git a/queue-2.6.38/v4l-release-module-if-subdev-registration-fails.patch b/queue-2.6.38/v4l-release-module-if-subdev-registration-fails.patch new file mode 100644 index 0000000000..1b5bbedbfe --- /dev/null +++ b/queue-2.6.38/v4l-release-module-if-subdev-registration-fails.patch @@ -0,0 +1,42 @@ +From b7534f002d3c81d18abfbf57179d07d3ec763bb5 Mon Sep 17 00:00:00 2001 +From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> +Date: Sat, 30 Apr 2011 10:34:05 -0300 +Subject: [media] v4l: Release module if subdev registration fails + +From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> + +commit b7534f002d3c81d18abfbf57179d07d3ec763bb5 upstream. + +If v4l2_device_register_subdev() fails, the reference to the subdev +module taken by the function isn't released. Fix this. + +Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> +Acked-by: Hans Verkuil <hverkuil@xs4all.nl> +Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/media/video/v4l2-device.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/media/video/v4l2-device.c ++++ b/drivers/media/video/v4l2-device.c +@@ -131,14 +131,17 @@ int v4l2_device_register_subdev(struct v + sd->v4l2_dev = v4l2_dev; + if (sd->internal_ops && sd->internal_ops->registered) { + err = sd->internal_ops->registered(sd); +- if (err) ++ if (err) { ++ module_put(sd->owner); + return err; ++ } + } + /* This just returns 0 if either of the two args is NULL */ + err = v4l2_ctrl_add_handler(v4l2_dev->ctrl_handler, sd->ctrl_handler); + if (err) { + if (sd->internal_ops && sd->internal_ops->unregistered) + sd->internal_ops->unregistered(sd); ++ module_put(sd->owner); + return err; + } + spin_lock(&v4l2_dev->lock); diff --git a/queue-2.6.38/vmxnet3-fix-inconsistent-lro-state-after-initialization.patch b/queue-2.6.38/vmxnet3-fix-inconsistent-lro-state-after-initialization.patch new file mode 100644 index 0000000000..8953f65446 --- /dev/null +++ b/queue-2.6.38/vmxnet3-fix-inconsistent-lro-state-after-initialization.patch @@ -0,0 +1,56 @@ +From ebde6f8acba92abfc203585198a54f47e83e2cd0 Mon Sep 17 00:00:00 2001 +From: Thomas Jarosch <thomas.jarosch@intra2net.com> +Date: Mon, 16 May 2011 06:28:15 +0000 +Subject: vmxnet3: Fix inconsistent LRO state after initialization + +From: Thomas Jarosch <thomas.jarosch@intra2net.com> + +commit ebde6f8acba92abfc203585198a54f47e83e2cd0 upstream. + +During initialization of vmxnet3, the state of LRO +gets out of sync with netdev->features. + +This leads to very poor TCP performance in a IP forwarding +setup and is hitting many VMware users. + +Simplified call sequence: +1. vmxnet3_declare_features() initializes "adapter->lro" to true. + +2. The kernel automatically disables LRO if IP forwarding is enabled, +so vmxnet3_set_flags() gets called. This also updates netdev->features. + +3. Now vmxnet3_setup_driver_shared() is called. "adapter->lro" is still +set to true and LRO gets enabled again, even though +netdev->features shows it's disabled. + +Fix it by updating "adapter->lro", too. + +The private vmxnet3 adapter flags are scheduled for removal +in net-next, see commit a0d2730c9571aeba793cb5d3009094ee1d8fda35 +"net: vmxnet3: convert to hw_features". + +Patch applies to 2.6.37 / 2.6.38 and 2.6.39-rc6. + +Please CC: comments. + +Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com> +Acked-by: Stephen Hemminger <shemminger@vyatta.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/net/vmxnet3/vmxnet3_ethtool.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c ++++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c +@@ -311,6 +311,9 @@ vmxnet3_set_flags(struct net_device *net + /* toggle the LRO feature*/ + netdev->features ^= NETIF_F_LRO; + ++ /* Update private LRO flag */ ++ adapter->lro = lro_requested; ++ + /* update harware LRO capability accordingly */ + if (lro_requested) + adapter->shared->devRead.misc.uptFeatures |= diff --git a/queue-2.6.38/x86-amd-fix-arat-feature-setting-again.patch b/queue-2.6.38/x86-amd-fix-arat-feature-setting-again.patch new file mode 100644 index 0000000000..4d4ca8f130 --- /dev/null +++ b/queue-2.6.38/x86-amd-fix-arat-feature-setting-again.patch @@ -0,0 +1,43 @@ +From 14fb57dccb6e1defe9f89a66f548fcb24c374c1d Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <borislav.petkov@amd.com> +Date: Tue, 17 May 2011 14:55:19 +0200 +Subject: x86, AMD: Fix ARAT feature setting again + +From: Borislav Petkov <borislav.petkov@amd.com> + +commit 14fb57dccb6e1defe9f89a66f548fcb24c374c1d upstream. + +Trying to enable the local APIC timer on early K8 revisions +uncovers a number of other issues with it, in conjunction with +the C1E enter path on AMD. Fixing those causes much more churn +and troubles than the benefit of using that timer brings so +don't enable it on K8 at all, falling back to the original +functionality the kernel had wrt to that. + +Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com> +Cc: Boris Ostrovsky <Boris.Ostrovsky@amd.com> +Cc: Andreas Herrmann <andreas.herrmann3@amd.com> +Cc: Greg Kroah-Hartman <greg@kroah.com> +Cc: Hans Rosenfeld <hans.rosenfeld@amd.com> +Cc: Nick Bowler <nbowler@elliptictech.com> +Cc: Joerg-Volker-Peetz <jvpeetz@web.de> +Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> +Link: http://lkml.kernel.org/r/1305636919-31165-3-git-send-email-bp@amd64.org +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/kernel/cpu/amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -596,7 +596,7 @@ static void __cpuinit init_amd(struct cp + #endif + + /* As a rule processors have APIC timer running in deep C states */ +- if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) ++ if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + set_cpu_cap(c, X86_FEATURE_ARAT); + + /* diff --git a/queue-2.6.38/x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch b/queue-2.6.38/x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch new file mode 100644 index 0000000000..e4e8f8131a --- /dev/null +++ b/queue-2.6.38/x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch @@ -0,0 +1,94 @@ +From e503f9e4b092e2349a9477a333543de8f3c7f5d9 Mon Sep 17 00:00:00 2001 +From: Youquan Song <youquan.song@intel.com> +Date: Fri, 22 Apr 2011 00:22:43 +0800 +Subject: x86, apic: Fix spurious error interrupts triggering on all non-boot APs + +From: Youquan Song <youquan.song@intel.com> + +commit e503f9e4b092e2349a9477a333543de8f3c7f5d9 upstream. + +This patch fixes a bug reported by a customer, who found +that many unreasonable error interrupts reported on all +non-boot CPUs (APs) during the system boot stage. + +According to Chapter 10 of Intel Software Developer Manual +Volume 3A, Local APIC may signal an illegal vector error when +an LVT entry is set as an illegal vector value (0~15) under +FIXED delivery mode (bits 8-11 is 0), regardless of whether +the mask bit is set or an interrupt actually happen. These +errors are seen as error interrupts. + +The initial value of thermal LVT entries on all APs always reads +0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI +sequence to them and LVT registers are reset to 0s except for +the mask bits which are set to 1s when APs receive INIT IPI. + +When the BIOS takes over the thermal throttling interrupt, +the LVT thermal deliver mode should be SMI and it is required +from the kernel to keep AP's LVT thermal monitoring register +programmed as such as well. + +This issue happens when BIOS does not take over thermal throttling +interrupt, AP's LVT thermal monitor register will be restored to +0x10000 which means vector 0 and fixed deliver mode, so all APs will +signal illegal vector error interrupts. + +This patch check if interrupt delivery mode is not fixed mode before +restoring AP's LVT thermal monitor register. + +Signed-off-by: Youquan Song <youquan.song@intel.com> +Acked-by: Suresh Siddha <suresh.b.siddha@intel.com> +Acked-by: Yong Wang <yong.y.wang@intel.com> +Cc: hpa@linux.intel.com +Cc: joe@perches.com +Cc: jbaron@redhat.com +Cc: trenn@suse.de +Cc: kent.liu@intel.com +Cc: chaohong.guo@intel.com +Link: http://lkml.kernel.org/r/1303402963-17738-1-git-send-email-youquan.song@intel.com +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/include/asm/apicdef.h | 1 + + arch/x86/kernel/cpu/mcheck/therm_throt.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/apicdef.h ++++ b/arch/x86/include/asm/apicdef.h +@@ -78,6 +78,7 @@ + #define APIC_DEST_LOGICAL 0x00800 + #define APIC_DEST_PHYSICAL 0x00000 + #define APIC_DM_FIXED 0x00000 ++#define APIC_DM_FIXED_MASK 0x00700 + #define APIC_DM_LOWEST 0x00100 + #define APIC_DM_SMI 0x00200 + #define APIC_DM_REMRD 0x00300 +--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c ++++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c +@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + ++ h = lvtthmr_init; + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. +- * Always restore the value that BIOS has programmed on AP based on +- * BSP's info we saved since BIOS is always setting the same value +- * for all threads/cores ++ * If BIOS takes over the thermal interrupt and sets its interrupt ++ * delivery mode to SMI (not fixed), it restores the value that the ++ * BIOS has programmed on AP based on BSP's info we saved since BIOS ++ * is always setting the same value for all threads/cores. + */ +- apic_write(APIC_LVTTHMR, lvtthmr_init); ++ if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) ++ apic_write(APIC_LVTTHMR, lvtthmr_init); + +- h = lvtthmr_init; + + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG diff --git a/queue-2.6.38/x86-fix-uv-bau-for-non-consecutive-nasids.patch b/queue-2.6.38/x86-fix-uv-bau-for-non-consecutive-nasids.patch new file mode 100644 index 0000000000..b8b8dd57cc --- /dev/null +++ b/queue-2.6.38/x86-fix-uv-bau-for-non-consecutive-nasids.patch @@ -0,0 +1,335 @@ +From 77ed23f8d995a01cd8101d84351b567bf5177a30 Mon Sep 17 00:00:00 2001 +From: Cliff Wickman <cpw@sgi.com> +Date: Tue, 10 May 2011 08:26:43 -0500 +Subject: x86: Fix UV BAU for non-consecutive nasids + +From: Cliff Wickman <cpw@sgi.com> + +commit 77ed23f8d995a01cd8101d84351b567bf5177a30 upstream. + +This is a fix for the SGI Altix-UV Broadcast Assist Unit code, +which is used for TLB flushing. + +Certain hardware configurations (that customers are ordering) +cause nasids (numa address space id's) to be non-consecutive. +Specifically, once you have more than 4 blades in a IRU +(Individual Rack Unit - or 1/2 rack) but less than the maximum +of 16, the nasid numbering becomes non-consecutive. This +currently results in a 'catastrophic error' (CATERR) detected by +the firmware during OS boot. The BAU is generating an 'INTD' +request that is targeting a non-existent nasid value. Such +configurations may also occur when a blade is configured off +because of hardware errors. (There is one UV hub per blade.) + +This patch is required to support such configurations. + +The problem with the tlb_uv.c code is that is using the +consecutive hub numbers as indices to the BAU distribution bit +map. These are simply the ordinal position of the hub or blade +within its partition. It should be using physical node numbers +(pnodes), which correspond to the physical nasid values. Use of +the hub number only works as long as the nasids in the partition +are consecutive and increase with a stride of 1. + +This patch changes the index to be the pnode number, thus +allowing nasids to be non-consecutive. +It also provides a table in local memory for each cpu to +translate target cpu number to target pnode and nasid. +And it improves naming to properly reflect 'node' and 'uvhub' +versus 'nasid'. + +Signed-off-by: Cliff Wickman <cpw@sgi.com> +Link: http://lkml.kernel.org/r/E1QJmxX-0002Mz-Fk@eag09.americas.sgi.com +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/include/asm/uv/uv_bau.h | 17 +++++-- + arch/x86/platform/uv/tlb_uv.c | 92 ++++++++++++++++++++++++++------------- + 2 files changed, 76 insertions(+), 33 deletions(-) + +--- a/arch/x86/include/asm/uv/uv_bau.h ++++ b/arch/x86/include/asm/uv/uv_bau.h +@@ -94,6 +94,8 @@ + /* after this # consecutive successes, bump up the throttle if it was lowered */ + #define COMPLETE_THRESHOLD 5 + ++#define UV_LB_SUBNODEID 0x10 ++ + /* + * number of entries in the destination side payload queue + */ +@@ -124,7 +126,7 @@ + * The distribution specification (32 bytes) is interpreted as a 256-bit + * distribution vector. Adjacent bits correspond to consecutive even numbered + * nodeIDs. The result of adding the index of a given bit to the 15-bit +- * 'base_dest_nodeid' field of the header corresponds to the ++ * 'base_dest_nasid' field of the header corresponds to the + * destination nodeID associated with that specified bit. + */ + struct bau_target_uvhubmask { +@@ -176,7 +178,7 @@ struct bau_msg_payload { + struct bau_msg_header { + unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ + /* bits 5:0 */ +- unsigned int base_dest_nodeid:15; /* nasid of the */ ++ unsigned int base_dest_nasid:15; /* nasid of the */ + /* bits 20:6 */ /* first bit in uvhub map */ + unsigned int command:8; /* message type */ + /* bits 28:21 */ +@@ -378,6 +380,10 @@ struct ptc_stats { + unsigned long d_rcanceled; /* number of messages canceled by resets */ + }; + ++struct hub_and_pnode { ++ short uvhub; ++ short pnode; ++}; + /* + * one per-cpu; to locate the software tables + */ +@@ -399,10 +405,12 @@ struct bau_control { + int baudisabled; + int set_bau_off; + short cpu; ++ short osnode; + short uvhub_cpu; + short uvhub; + short cpus_in_socket; + short cpus_in_uvhub; ++ short partition_base_pnode; + unsigned short message_number; + unsigned short uvhub_quiesce; + short socket_acknowledge_count[DEST_Q_SIZE]; +@@ -422,15 +430,16 @@ struct bau_control { + int congested_period; + cycles_t period_time; + long period_requests; ++ struct hub_and_pnode *target_hub_and_pnode; + }; + + static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) + { + return constant_test_bit(uvhub, &dstp->bits[0]); + } +-static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) ++static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) + { +- __set_bit(uvhub, &dstp->bits[0]); ++ __set_bit(pnode, &dstp->bits[0]); + } + static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, + int nbits) +--- a/arch/x86/platform/uv/tlb_uv.c ++++ b/arch/x86/platform/uv/tlb_uv.c +@@ -698,16 +698,17 @@ const struct cpumask *uv_flush_tlb_other + struct mm_struct *mm, + unsigned long va, unsigned int cpu) + { +- int tcpu; +- int uvhub; + int locals = 0; + int remotes = 0; + int hubs = 0; ++ int tcpu; ++ int tpnode; + struct bau_desc *bau_desc; + struct cpumask *flush_mask; + struct ptc_stats *stat; + struct bau_control *bcp; + struct bau_control *tbcp; ++ struct hub_and_pnode *hpp; + + /* kernel was booted 'nobau' */ + if (nobau) +@@ -749,11 +750,18 @@ const struct cpumask *uv_flush_tlb_other + bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; + bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); + +- /* cpu statistics */ + for_each_cpu(tcpu, flush_mask) { +- uvhub = uv_cpu_to_blade_id(tcpu); +- bau_uvhub_set(uvhub, &bau_desc->distribution); +- if (uvhub == bcp->uvhub) ++ /* ++ * The distribution vector is a bit map of pnodes, relative ++ * to the partition base pnode (and the partition base nasid ++ * in the header). ++ * Translate cpu to pnode and hub using an array stored ++ * in local memory. ++ */ ++ hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; ++ tpnode = hpp->pnode - bcp->partition_base_pnode; ++ bau_uvhub_set(tpnode, &bau_desc->distribution); ++ if (hpp->uvhub == bcp->uvhub) + locals++; + else + remotes++; +@@ -854,7 +862,7 @@ void uv_bau_message_interrupt(struct pt_ + * an interrupt, but causes an error message to be returned to + * the sender. + */ +-static void uv_enable_timeouts(void) ++static void __init uv_enable_timeouts(void) + { + int uvhub; + int nuvhubs; +@@ -1325,10 +1333,10 @@ static int __init uv_ptc_init(void) + } + + /* +- * initialize the sending side's sending buffers ++ * Initialize the sending side's sending buffers. + */ + static void +-uv_activation_descriptor_init(int node, int pnode) ++uv_activation_descriptor_init(int node, int pnode, int base_pnode) + { + int i; + int cpu; +@@ -1351,11 +1359,11 @@ uv_activation_descriptor_init(int node, + n = pa >> uv_nshift; + m = pa & uv_mmask; + ++ /* the 14-bit pnode */ + uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, + (n << UV_DESC_BASE_PNODE_SHIFT | m)); +- + /* +- * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each ++ * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * cpu even though we only use the first one; one descriptor can + * describe a broadcast to 256 uv hubs. + */ +@@ -1364,12 +1372,13 @@ uv_activation_descriptor_init(int node, + memset(bd2, 0, sizeof(struct bau_desc)); + bd2->header.sw_ack_flag = 1; + /* +- * base_dest_nodeid is the nasid of the first uvhub +- * in the partition. The bit map will indicate uvhub numbers, +- * which are 0-N in a partition. Pnodes are unique system-wide. ++ * The base_dest_nasid set in the message header is the nasid ++ * of the first uvhub in the partition. The bit map will ++ * indicate destination pnode numbers relative to that base. ++ * They may not be consecutive if nasid striding is being used. + */ +- bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); +- bd2->header.dest_subnodeid = 0x10; /* the LB */ ++ bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); ++ bd2->header.dest_subnodeid = UV_LB_SUBNODEID; + bd2->header.command = UV_NET_ENDPOINT_INTD; + bd2->header.int_both = 1; + /* +@@ -1441,7 +1450,7 @@ uv_payload_queue_init(int node, int pnod + /* + * Initialization of each UV hub's structures + */ +-static void __init uv_init_uvhub(int uvhub, int vector) ++static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) + { + int node; + int pnode; +@@ -1449,11 +1458,11 @@ static void __init uv_init_uvhub(int uvh + + node = uvhub_to_first_node(uvhub); + pnode = uv_blade_to_pnode(uvhub); +- uv_activation_descriptor_init(node, pnode); ++ uv_activation_descriptor_init(node, pnode, base_pnode); + uv_payload_queue_init(node, pnode); + /* +- * the below initialization can't be in firmware because the +- * messaging IRQ will be determined by the OS ++ * The below initialization can't be in firmware because the ++ * messaging IRQ will be determined by the OS. + */ + apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, +@@ -1490,10 +1499,11 @@ calculate_destination_timeout(void) + /* + * initialize the bau_control structure for each cpu + */ +-static int __init uv_init_per_cpu(int nuvhubs) ++static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) + { + int i; + int cpu; ++ int tcpu; + int pnode; + int uvhub; + int have_hmaster; +@@ -1527,6 +1537,15 @@ static int __init uv_init_per_cpu(int nu + bcp = &per_cpu(bau_control, cpu); + memset(bcp, 0, sizeof(struct bau_control)); + pnode = uv_cpu_hub_info(cpu)->pnode; ++ if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { ++ printk(KERN_EMERG ++ "cpu %d pnode %d-%d beyond %d; BAU disabled\n", ++ cpu, pnode, base_part_pnode, ++ UV_DISTRIBUTION_SIZE); ++ return 1; ++ } ++ bcp->osnode = cpu_to_node(cpu); ++ bcp->partition_base_pnode = uv_partition_base_pnode; + uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; + *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); + bdp = &uvhub_descs[uvhub]; +@@ -1535,7 +1554,7 @@ static int __init uv_init_per_cpu(int nu + bdp->pnode = pnode; + /* kludge: 'assuming' one node per socket, and assuming that + disabling a socket just leaves a gap in node numbers */ +- socket = (cpu_to_node(cpu) & 1); ++ socket = bcp->osnode & 1; + bdp->socket_mask |= (1 << socket); + sdp = &bdp->socket[socket]; + sdp->cpu_number[sdp->num_cpus] = cpu; +@@ -1584,6 +1603,20 @@ static int __init uv_init_per_cpu(int nu + nextsocket: + socket++; + socket_mask = (socket_mask >> 1); ++ /* each socket gets a local array of pnodes/hubs */ ++ bcp = smaster; ++ bcp->target_hub_and_pnode = kmalloc_node( ++ sizeof(struct hub_and_pnode) * ++ num_possible_cpus(), GFP_KERNEL, bcp->osnode); ++ memset(bcp->target_hub_and_pnode, 0, ++ sizeof(struct hub_and_pnode) * ++ num_possible_cpus()); ++ for_each_present_cpu(tcpu) { ++ bcp->target_hub_and_pnode[tcpu].pnode = ++ uv_cpu_hub_info(tcpu)->pnode; ++ bcp->target_hub_and_pnode[tcpu].uvhub = ++ uv_cpu_hub_info(tcpu)->numa_blade_id; ++ } + } + } + kfree(uvhub_descs); +@@ -1636,21 +1669,22 @@ static int __init uv_bau_init(void) + spin_lock_init(&disable_lock); + congested_cycles = microsec_2_cycles(congested_response_us); + +- if (uv_init_per_cpu(nuvhubs)) { +- nobau = 1; +- return 0; +- } +- + uv_partition_base_pnode = 0x7fffffff; +- for (uvhub = 0; uvhub < nuvhubs; uvhub++) ++ for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + if (uv_blade_nr_possible_cpus(uvhub) && + (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) + uv_partition_base_pnode = uv_blade_to_pnode(uvhub); ++ } ++ ++ if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { ++ nobau = 1; ++ return 0; ++ } + + vector = UV_BAU_MESSAGE; + for_each_possible_blade(uvhub) + if (uv_blade_nr_possible_cpus(uvhub)) +- uv_init_uvhub(uvhub, vector); ++ uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); + + uv_enable_timeouts(); + alloc_intr_gate(vector, uv_bau_message_intr1); diff --git a/queue-2.6.38/x86-mce-amd-fix-leaving-freed-data-in-a-list.patch b/queue-2.6.38/x86-mce-amd-fix-leaving-freed-data-in-a-list.patch new file mode 100644 index 0000000000..c8cad64762 --- /dev/null +++ b/queue-2.6.38/x86-mce-amd-fix-leaving-freed-data-in-a-list.patch @@ -0,0 +1,53 @@ +From d9a5ac9ef306eb5cc874f285185a15c303c50009 Mon Sep 17 00:00:00 2001 +From: Julia Lawall <julia@diku.dk> +Date: Fri, 13 May 2011 15:52:09 +0200 +Subject: x86, mce, AMD: Fix leaving freed data in a list + +From: Julia Lawall <julia@diku.dk> + +commit d9a5ac9ef306eb5cc874f285185a15c303c50009 upstream. + +b may be added to a list, but is not removed before being freed +in the case of an error. This is done in the corresponding +deallocation function, so the code here has been changed to +follow that. + +The sematic match that finds this problem is as follows: +(http://coccinelle.lip6.fr/) + +// <smpl> +@@ +expression E,E1,E2; +identifier l; +@@ + +*list_add(&E->l,E1); +... when != E1 + when != list_del(&E->l) + when != list_del_init(&E->l) + when != E = E2 +*kfree(E);// </smpl> + +Signed-off-by: Julia Lawall <julia@diku.dk> +Cc: Borislav Petkov <borislav.petkov@amd.com> +Cc: Robert Richter <robert.richter@amd.com> +Cc: Yinghai Lu <yinghai@kernel.org> +Cc: Andreas Herrmann <andreas.herrmann3@amd.com> +Link: http://lkml.kernel.org/r/1305294731-12127-1-git-send-email-julia@diku.dk +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/kernel/cpu/mcheck/mce_amd.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c +@@ -509,6 +509,7 @@ recurse: + out_free: + if (b) { + kobject_put(&b->kobj); ++ list_del(&b->miscj); + kfree(b); + } + return err; |