diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-09-15 10:04:15 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-09-15 10:04:15 +0200 |
commit | 9eb694335cc4ae8da2f8ce2c4107ba0e67a887ce (patch) | |
tree | 267cccbe313a37723096b712ac564cb2f93279d7 | |
parent | 7697ff111e24b00acdd2ecffa79ba75a5ba1cc69 (diff) | |
download | stable-queue-9eb694335cc4ae8da2f8ce2c4107ba0e67a887ce.tar.gz |
3.0 patches
15 files changed, 1045 insertions, 0 deletions
diff --git a/queue-3.0/alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch b/queue-3.0/alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch new file mode 100644 index 0000000000..ef878e2fff --- /dev/null +++ b/queue-3.0/alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch @@ -0,0 +1,42 @@ +From 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 Mon Sep 17 00:00:00 2001 +From: John Stultz <john.stultz@linaro.org> +Date: Wed, 10 Aug 2011 10:26:09 -0700 +Subject: alarmtimers: Avoid possible denial of service with high freq periodic timers + +From: John Stultz <john.stultz@linaro.org> + +commit 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 upstream. + +Its possible to jam up the alarm timers by setting very small interval +timers, which will cause the alarmtimer subsystem to spend all of its time +firing and restarting timers. This can effectivly lock up a box. + +A deeper fix is needed, closely mimicking the hrtimer code, but for now +just cap the interval to 100us to avoid userland hanging the system. + +CC: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: John Stultz <john.stultz@linaro.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/time/alarmtimer.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -481,6 +481,15 @@ static int alarm_timer_set(struct k_itim + if (!rtcdev) + return -ENOTSUPP; + ++ /* ++ * XXX HACK! Currently we can DOS a system if the interval ++ * period on alarmtimers is too small. Cap the interval here ++ * to 100us and solve this properly in a future patch! -jstultz ++ */ ++ if ((new_setting->it_interval.tv_sec == 0) && ++ (new_setting->it_interval.tv_nsec < 100000)) ++ new_setting->it_interval.tv_nsec = 100000; ++ + if (old_setting) + alarm_timer_get(timr, old_setting); + diff --git a/queue-3.0/alarmtimers-avoid-possible-null-pointer-traversal.patch b/queue-3.0/alarmtimers-avoid-possible-null-pointer-traversal.patch new file mode 100644 index 0000000000..aaa61bc1b4 --- /dev/null +++ b/queue-3.0/alarmtimers-avoid-possible-null-pointer-traversal.patch @@ -0,0 +1,36 @@ +From 971c90bfa2f0b4fe52d6d9002178d547706f1343 Mon Sep 17 00:00:00 2001 +From: John Stultz <john.stultz@linaro.org> +Date: Thu, 4 Aug 2011 07:25:35 -0700 +Subject: alarmtimers: Avoid possible null pointer traversal + +From: John Stultz <john.stultz@linaro.org> + +commit 971c90bfa2f0b4fe52d6d9002178d547706f1343 upstream. + +We don't check if old_setting is non null before assigning it, so +correct this. + +CC: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: John Stultz <john.stultz@linaro.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/time/alarmtimer.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -479,11 +479,8 @@ static int alarm_timer_set(struct k_itim + if (!rtcdev) + return -ENOTSUPP; + +- /* Save old values */ +- old_setting->it_interval = +- ktime_to_timespec(timr->it.alarmtimer.period); +- old_setting->it_value = +- ktime_to_timespec(timr->it.alarmtimer.node.expires); ++ if (old_setting) ++ alarm_timer_get(timr, old_setting); + + /* If the timer was already set, cancel it */ + alarm_cancel(&timr->it.alarmtimer); diff --git a/queue-3.0/alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch b/queue-3.0/alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch new file mode 100644 index 0000000000..0b207dcd88 --- /dev/null +++ b/queue-3.0/alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch @@ -0,0 +1,30 @@ +From ea7802f630d356acaf66b3c0b28c00a945fc35dc Mon Sep 17 00:00:00 2001 +From: John Stultz <john.stultz@linaro.org> +Date: Thu, 4 Aug 2011 07:51:56 -0700 +Subject: alarmtimers: Memset itimerspec passed into alarm_timer_get + +From: John Stultz <john.stultz@linaro.org> + +commit ea7802f630d356acaf66b3c0b28c00a945fc35dc upstream. + +Following common_timer_get, zero out the itimerspec passed in. + +CC: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: John Stultz <john.stultz@linaro.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/time/alarmtimer.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_i + static void alarm_timer_get(struct k_itimer *timr, + struct itimerspec *cur_setting) + { ++ memset(cur_setting, 0, sizeof(struct itimerspec)); ++ + cur_setting->it_interval = + ktime_to_timespec(timr->it.alarmtimer.period); + cur_setting->it_value = diff --git a/queue-3.0/mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch b/queue-3.0/mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch new file mode 100644 index 0000000000..39b6c80fda --- /dev/null +++ b/queue-3.0/mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch @@ -0,0 +1,267 @@ +From 778e277cb82411c9002ca28ccbd216c4d9eb9158 Mon Sep 17 00:00:00 2001 +From: Mika Westerberg <mika.westerberg@linux.intel.com> +Date: Thu, 18 Aug 2011 15:23:48 +0300 +Subject: mmc: core: prevent aggressive clock gating racing with ios updates + +From: Mika Westerberg <mika.westerberg@linux.intel.com> + +commit 778e277cb82411c9002ca28ccbd216c4d9eb9158 upstream. + +We have seen at least two different races when clock gating kicks in in a +middle of ios structure update. + +First one happens when ios->clock is changed outside of aggressive clock +gating framework, for example via mmc_set_clock(). The race might happen +when we run following code: + +mmc_set_ios(): + ... + if (ios->clock > 0) + mmc_set_ungated(host); + +Now if gating kicks in right after the condition check we end up setting +host->clk_gated to false even though we have just gated the clock. Next +time a request is started we try to ungate and restore the clock in +mmc_host_clk_hold(). However since we have host->clk_gated set to false the +original clock is not restored. + +This eventually will cause the host controller to hang since its clock is +disabled while we are trying to issue a request. For example on Intel +Medfield platform we see: + +[ 13.818610] mmc2: Timeout waiting for hardware interrupt. +[ 13.818698] sdhci: =========== REGISTER DUMP (mmc2)=========== +[ 13.818753] sdhci: Sys addr: 0x00000000 | Version: 0x00008901 +[ 13.818804] sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 +[ 13.818853] sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 +[ 13.818903] sdhci: Present: 0x1fff0000 | Host ctl: 0x00000001 +[ 13.818951] sdhci: Power: 0x0000000d | Blk gap: 0x00000000 +[ 13.819000] sdhci: Wake-up: 0x00000000 | Clock: 0x00000000 +[ 13.819049] sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 +[ 13.819098] sdhci: Int enab: 0x00ff00c3 | Sig enab: 0x00ff00c3 +[ 13.819147] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 +[ 13.819196] sdhci: Caps: 0x6bee32b2 | Caps_1: 0x00000000 +[ 13.819245] sdhci: Cmd: 0x00000000 | Max curr: 0x00000000 +[ 13.819292] sdhci: Host ctl2: 0x00000000 +[ 13.819331] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000 +[ 13.819377] sdhci: =========================================== +[ 13.919605] mmc2: Reset 0x2 never completed. + +and it never recovers. + +Second race might happen while running mmc_power_off(): + +static void mmc_power_off(struct mmc_host *host) +{ + host->ios.clock = 0; + host->ios.vdd = 0; + +[ clock gating kicks in here ] + + /* + * Reset ocr mask to be the highest possible voltage supported for + * this mmc host. This value will be used at next power up. + */ + host->ocr = 1 << (fls(host->ocr_avail) - 1); + + if (!mmc_host_is_spi(host)) { + host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; + host->ios.chip_select = MMC_CS_DONTCARE; + } + host->ios.power_mode = MMC_POWER_OFF; + host->ios.bus_width = MMC_BUS_WIDTH_1; + host->ios.timing = MMC_TIMING_LEGACY; + mmc_set_ios(host); +} + +If the clock gating worker kicks in while we are only partially updated the +ios structure the host controller gets incomplete ios and might not work as +supposed. Again on Intel Medfield platform we get: + +[ 4.185349] kernel BUG at drivers/mmc/host/sdhci.c:1155! +[ 4.185422] invalid opcode: 0000 [#1] PREEMPT SMP +[ 4.185509] Modules linked in: +[ 4.185565] +[ 4.185608] Pid: 4, comm: kworker/0:0 Not tainted 3.0.0+ #240 Intel Corporation Medfield/iCDKA +[ 4.185742] EIP: 0060:[<c136364e>] EFLAGS: 00010083 CPU: 0 +[ 4.185827] EIP is at sdhci_set_power+0x3e/0xd0 +[ 4.185891] EAX: f5ff98e0 EBX: f5ff98e0 ECX: 00000000 EDX: 00000001 +[ 4.185970] ESI: f5ff977c EDI: f5ff9904 EBP: f644fe98 ESP: f644fe94 +[ 4.186049] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 +[ 4.186125] Process kworker/0:0 (pid: 4, ti=f644e000 task=f644c0e0 task.ti=f644e000) +[ 4.186219] Stack: +[ 4.186257] f5ff98e0 f644feb0 c1365173 00000282 f5ff9460 f5ff96e0 f5ff96e0 f644feec +[ 4.186418] c1355bd8 f644c0e0 c1499c3d f5ff96e0 f644fed4 00000006 f5ff96e0 00000286 +[ 4.186579] f644fedc c107922b f644feec 00000286 f5ff9460 f5ff9700 f644ff10 c135839e +[ 4.186739] Call Trace: +[ 4.186802] [<c1365173>] sdhci_set_ios+0x1c3/0x340 +[ 4.186883] [<c1355bd8>] mmc_gate_clock+0x68/0x120 +[ 4.186963] [<c1499c3d>] ? _raw_spin_unlock_irqrestore+0x4d/0x60 +[ 4.187052] [<c107922b>] ? trace_hardirqs_on+0xb/0x10 +[ 4.187134] [<c135839e>] mmc_host_clk_gate_delayed+0xbe/0x130 +[ 4.187219] [<c105ec09>] ? process_one_work+0xf9/0x5b0 +[ 4.187300] [<c135841d>] mmc_host_clk_gate_work+0xd/0x10 +[ 4.187379] [<c105ec82>] process_one_work+0x172/0x5b0 +[ 4.187457] [<c105ec09>] ? process_one_work+0xf9/0x5b0 +[ 4.187538] [<c1358410>] ? mmc_host_clk_gate_delayed+0x130/0x130 +[ 4.187625] [<c105f3c8>] worker_thread+0x118/0x330 +[ 4.187700] [<c1496cee>] ? preempt_schedule+0x2e/0x50 +[ 4.187779] [<c105f2b0>] ? rescuer_thread+0x1f0/0x1f0 +[ 4.187857] [<c1062cf4>] kthread+0x74/0x80 +[ 4.187931] [<c1062c80>] ? __init_kthread_worker+0x60/0x60 +[ 4.188015] [<c149acfa>] kernel_thread_helper+0x6/0xd +[ 4.188079] Code: 81 fa 00 00 04 00 0f 84 a7 00 00 00 7f 21 81 fa 80 00 00 00 0f 84 92 00 00 00 81 fa 00 00 0 +[ 4.188780] EIP: [<c136364e>] sdhci_set_power+0x3e/0xd0 SS:ESP 0068:f644fe94 +[ 4.188898] ---[ end trace a7b23eecc71777e4 ]--- + +This BUG() comes from the fact that ios.power_mode was still in previous +value (MMC_POWER_ON) and ios.vdd was set to zero. + +We prevent these by inhibiting the clock gating while we update the ios +structure. + +Both problems can be reproduced by simply running the device in a reboot +loop. + +Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> +Reviewed-by: Linus Walleij <linus.walleij@linaro.org> +Tested-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/mmc/core/core.c | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/core/core.c ++++ b/drivers/mmc/core/core.c +@@ -634,15 +634,17 @@ static inline void mmc_set_ios(struct mm + */ + void mmc_set_chip_select(struct mmc_host *host, int mode) + { ++ mmc_host_clk_hold(host); + host->ios.chip_select = mode; + mmc_set_ios(host); ++ mmc_host_clk_release(host); + } + + /* + * Sets the host clock to the highest possible frequency that + * is below "hz". + */ +-void mmc_set_clock(struct mmc_host *host, unsigned int hz) ++static void __mmc_set_clock(struct mmc_host *host, unsigned int hz) + { + WARN_ON(hz < host->f_min); + +@@ -653,6 +655,13 @@ void mmc_set_clock(struct mmc_host *host + mmc_set_ios(host); + } + ++void mmc_set_clock(struct mmc_host *host, unsigned int hz) ++{ ++ mmc_host_clk_hold(host); ++ __mmc_set_clock(host, hz); ++ mmc_host_clk_release(host); ++} ++ + #ifdef CONFIG_MMC_CLKGATE + /* + * This gates the clock by setting it to 0 Hz. +@@ -685,7 +694,7 @@ void mmc_ungate_clock(struct mmc_host *h + if (host->clk_old) { + BUG_ON(host->ios.clock); + /* This call will also set host->clk_gated to false */ +- mmc_set_clock(host, host->clk_old); ++ __mmc_set_clock(host, host->clk_old); + } + } + +@@ -713,8 +722,10 @@ void mmc_set_ungated(struct mmc_host *ho + */ + void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode) + { ++ mmc_host_clk_hold(host); + host->ios.bus_mode = mode; + mmc_set_ios(host); ++ mmc_host_clk_release(host); + } + + /* +@@ -722,8 +733,10 @@ void mmc_set_bus_mode(struct mmc_host *h + */ + void mmc_set_bus_width(struct mmc_host *host, unsigned int width) + { ++ mmc_host_clk_hold(host); + host->ios.bus_width = width; + mmc_set_ios(host); ++ mmc_host_clk_release(host); + } + + /** +@@ -921,8 +934,10 @@ u32 mmc_select_voltage(struct mmc_host * + + ocr &= 3 << bit; + ++ mmc_host_clk_hold(host); + host->ios.vdd = bit; + mmc_set_ios(host); ++ mmc_host_clk_release(host); + } else { + pr_warning("%s: host doesn't support card's voltages\n", + mmc_hostname(host)); +@@ -969,8 +984,10 @@ int mmc_set_signal_voltage(struct mmc_ho + */ + void mmc_set_timing(struct mmc_host *host, unsigned int timing) + { ++ mmc_host_clk_hold(host); + host->ios.timing = timing; + mmc_set_ios(host); ++ mmc_host_clk_release(host); + } + + /* +@@ -978,8 +995,10 @@ void mmc_set_timing(struct mmc_host *hos + */ + void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type) + { ++ mmc_host_clk_hold(host); + host->ios.drv_type = drv_type; + mmc_set_ios(host); ++ mmc_host_clk_release(host); + } + + /* +@@ -997,6 +1016,8 @@ static void mmc_power_up(struct mmc_host + { + int bit; + ++ mmc_host_clk_hold(host); ++ + /* If ocr is set, we use it */ + if (host->ocr) + bit = ffs(host->ocr) - 1; +@@ -1032,10 +1053,14 @@ static void mmc_power_up(struct mmc_host + * time required to reach a stable voltage. + */ + mmc_delay(10); ++ ++ mmc_host_clk_release(host); + } + + static void mmc_power_off(struct mmc_host *host) + { ++ mmc_host_clk_hold(host); ++ + host->ios.clock = 0; + host->ios.vdd = 0; + +@@ -1053,6 +1078,8 @@ static void mmc_power_off(struct mmc_hos + host->ios.bus_width = MMC_BUS_WIDTH_1; + host->ios.timing = MMC_TIMING_LEGACY; + mmc_set_ios(host); ++ ++ mmc_host_clk_release(host); + } + + /* diff --git a/queue-3.0/mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch b/queue-3.0/mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch new file mode 100644 index 0000000000..e4bade3512 --- /dev/null +++ b/queue-3.0/mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch @@ -0,0 +1,37 @@ +From 50a50f9248497484c678631a9c1a719f1aaeab79 Mon Sep 17 00:00:00 2001 +From: Mika Westerberg <mika.westerberg@linux.intel.com> +Date: Thu, 18 Aug 2011 15:23:49 +0300 +Subject: mmc: core: use non-reentrant workqueue for clock gating + +From: Mika Westerberg <mika.westerberg@linux.intel.com> + +commit 50a50f9248497484c678631a9c1a719f1aaeab79 upstream. + +The default multithread workqueue can cause the same work to be executed +concurrently on a different CPUs. This isn't really suitable for clock +gating as it might already gated the clock and gating it twice results both +host->clk_old and host->ios.clock to be set to 0. + +To prevent this from happening we use system_nrt_wq instead. + +Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> +Reviewed-by: Linus Walleij <linus.walleij@linaro.org> +Tested-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/mmc/core/host.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mmc/core/host.c ++++ b/drivers/mmc/core/host.c +@@ -179,7 +179,7 @@ void mmc_host_clk_release(struct mmc_hos + host->clk_requests--; + if (mmc_host_may_gate_card(host->card) && + !host->clk_requests) +- schedule_work(&host->clk_gate_work); ++ queue_work(system_nrt_wq, &host->clk_gate_work); + spin_unlock_irqrestore(&host->clk_lock, flags); + } + diff --git a/queue-3.0/mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch b/queue-3.0/mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch new file mode 100644 index 0000000000..b92ec47060 --- /dev/null +++ b/queue-3.0/mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch @@ -0,0 +1,121 @@ +From 08c14071fda4e69abb9d5b1566651cd092b158d3 Mon Sep 17 00:00:00 2001 +From: Mika Westerberg <mika.westerberg@linux.intel.com> +Date: Thu, 18 Aug 2011 15:23:47 +0300 +Subject: mmc: rename mmc_host_clk_{ungate|gate} to mmc_host_clk_{hold|release} + +From: Mika Westerberg <mika.westerberg@linux.intel.com> + +commit 08c14071fda4e69abb9d5b1566651cd092b158d3 upstream. + +As per suggestion by Linus Walleij: + + > If you think the names of the functions are confusing then + > you may rename them, say like this: + > + > mmc_host_clk_ungate() -> mmc_host_clk_hold() + > mmc_host_clk_gate() -> mmc_host_clk_release() + > + > Which would make the usecases more clear + +(This is CC'd to stable@ because the next two patches, which fix +observable races, depend on it.) + +Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> +Reviewed-by: Linus Walleij <linus.walleij@linaro.org> +Signed-off-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/mmc/core/core.c | 4 ++-- + drivers/mmc/core/host.c | 10 +++++----- + drivers/mmc/core/host.h | 8 ++++---- + 3 files changed, 11 insertions(+), 11 deletions(-) + +--- a/drivers/mmc/core/core.c ++++ b/drivers/mmc/core/core.c +@@ -132,7 +132,7 @@ void mmc_request_done(struct mmc_host *h + if (mrq->done) + mrq->done(mrq); + +- mmc_host_clk_gate(host); ++ mmc_host_clk_release(host); + } + } + +@@ -191,7 +191,7 @@ mmc_start_request(struct mmc_host *host, + mrq->stop->mrq = mrq; + } + } +- mmc_host_clk_ungate(host); ++ mmc_host_clk_hold(host); + led_trigger_event(host->led, LED_FULL); + host->ops->request(host, mrq); + } +--- a/drivers/mmc/core/host.c ++++ b/drivers/mmc/core/host.c +@@ -119,14 +119,14 @@ static void mmc_host_clk_gate_work(struc + } + + /** +- * mmc_host_clk_ungate - ungate hardware MCI clocks ++ * mmc_host_clk_hold - ungate hardware MCI clocks + * @host: host to ungate. + * + * Makes sure the host ios.clock is restored to a non-zero value + * past this call. Increase clock reference count and ungate clock + * if we're the first user. + */ +-void mmc_host_clk_ungate(struct mmc_host *host) ++void mmc_host_clk_hold(struct mmc_host *host) + { + unsigned long flags; + +@@ -164,14 +164,14 @@ static bool mmc_host_may_gate_card(struc + } + + /** +- * mmc_host_clk_gate - gate off hardware MCI clocks ++ * mmc_host_clk_release - gate off hardware MCI clocks + * @host: host to gate. + * + * Calls the host driver with ios.clock set to zero as often as possible + * in order to gate off hardware MCI clocks. Decrease clock reference + * count and schedule disabling of clock. + */ +-void mmc_host_clk_gate(struct mmc_host *host) ++void mmc_host_clk_release(struct mmc_host *host) + { + unsigned long flags; + +@@ -231,7 +231,7 @@ static inline void mmc_host_clk_exit(str + if (cancel_work_sync(&host->clk_gate_work)) + mmc_host_clk_gate_delayed(host); + if (host->clk_gated) +- mmc_host_clk_ungate(host); ++ mmc_host_clk_hold(host); + /* There should be only one user now */ + WARN_ON(host->clk_requests > 1); + } +--- a/drivers/mmc/core/host.h ++++ b/drivers/mmc/core/host.h +@@ -16,16 +16,16 @@ int mmc_register_host_class(void); + void mmc_unregister_host_class(void); + + #ifdef CONFIG_MMC_CLKGATE +-void mmc_host_clk_ungate(struct mmc_host *host); +-void mmc_host_clk_gate(struct mmc_host *host); ++void mmc_host_clk_hold(struct mmc_host *host); ++void mmc_host_clk_release(struct mmc_host *host); + unsigned int mmc_host_clk_rate(struct mmc_host *host); + + #else +-static inline void mmc_host_clk_ungate(struct mmc_host *host) ++static inline void mmc_host_clk_hold(struct mmc_host *host) + { + } + +-static inline void mmc_host_clk_gate(struct mmc_host *host) ++static inline void mmc_host_clk_release(struct mmc_host *host) + { + } + diff --git a/queue-3.0/mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch b/queue-3.0/mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch new file mode 100644 index 0000000000..bc6290efd0 --- /dev/null +++ b/queue-3.0/mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch @@ -0,0 +1,41 @@ +From 49bb1e619568ec84785ceb366f07db2a6f0b64cc Mon Sep 17 00:00:00 2001 +From: Girish K S <girish.shivananjappa@linaro.org> +Date: Fri, 26 Aug 2011 14:58:18 +0530 +Subject: mmc: sdhci-s3c: Fix mmc card I/O problem + +From: Girish K S <girish.shivananjappa@linaro.org> + +commit 49bb1e619568ec84785ceb366f07db2a6f0b64cc upstream. + +This patch fixes the problem in sdhci-s3c host driver for Samsung Soc's. +During the card identification stage the mmc core driver enumerates for +the best bus width in combination with the highest available data rate. +It starts enumerating from the highest bus width (8) to lowest width (1). + +In case of few MMC cards the 4-bit bus enumeration fails and tries +the 1-bit bus enumeration. When switched to 1-bit bus mode the host driver +has to clear the previous bus width setting and apply the new setting. + +The current patch will clear the previous bus mode and apply the new +mode setting. + +Signed-off-by: Girish K S <girish.shivananjappa@linaro.org> +Acked-by: Jaehoon Chung <jh80.chung@samsung.com> +Signed-off-by: Chris Ball <cjb@laptop.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/mmc/host/sdhci-s3c.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/mmc/host/sdhci-s3c.c ++++ b/drivers/mmc/host/sdhci-s3c.c +@@ -301,6 +301,8 @@ static int sdhci_s3c_platform_8bit_width + ctrl &= ~SDHCI_CTRL_8BITBUS; + break; + default: ++ ctrl &= ~SDHCI_CTRL_4BITBUS; ++ ctrl &= ~SDHCI_CTRL_8BITBUS; + break; + } + diff --git a/queue-3.0/rtc-fix-rtc-pie-frequency-limit.patch b/queue-3.0/rtc-fix-rtc-pie-frequency-limit.patch new file mode 100644 index 0000000000..4e53a7bd63 --- /dev/null +++ b/queue-3.0/rtc-fix-rtc-pie-frequency-limit.patch @@ -0,0 +1,46 @@ +From 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 Mon Sep 17 00:00:00 2001 +From: John Stultz <john.stultz@linaro.org> +Date: Fri, 22 Jul 2011 09:12:51 +0000 +Subject: rtc: Fix RTC PIE frequency limit + +From: John Stultz <john.stultz@linaro.org> + +commit 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 upstream. + +Thomas earlier submitted a fix to limit the RTC PIE freq, but +picked 5000Hz out of the air. Willy noticed that we should +instead use the 8192Hz max from the rtc man documentation. + +Cc: Willy Tarreau <w@1wt.eu> +Cc: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: John Stultz <john.stultz@linaro.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/rtc/interface.c | 2 +- + include/linux/rtc.h | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/rtc/interface.c ++++ b/drivers/rtc/interface.c +@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device * + int err = 0; + unsigned long flags; + +- if (freq <= 0 || freq > 5000) ++ if (freq <= 0 || freq > RTC_MAX_FREQ) + return -EINVAL; + retry: + spin_lock_irqsave(&rtc->irq_task_lock, flags); +--- a/include/linux/rtc.h ++++ b/include/linux/rtc.h +@@ -97,6 +97,9 @@ struct rtc_pll_info { + #define RTC_AF 0x20 /* Alarm interrupt */ + #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ + ++ ++#define RTC_MAX_FREQ 8192 ++ + #ifdef __KERNEL__ + + #include <linux/types.h> diff --git a/queue-3.0/sched-fix-a-memory-leak-in-__sdt_free.patch b/queue-3.0/sched-fix-a-memory-leak-in-__sdt_free.patch new file mode 100644 index 0000000000..ee934d0a8a --- /dev/null +++ b/queue-3.0/sched-fix-a-memory-leak-in-__sdt_free.patch @@ -0,0 +1,54 @@ +From feff8fa0075bdfd43c841e9d689ed81adda988d6 Mon Sep 17 00:00:00 2001 +From: WANG Cong <amwang@redhat.com> +Date: Thu, 18 Aug 2011 20:36:57 +0800 +Subject: sched: Fix a memory leak in __sdt_free() + +From: WANG Cong <amwang@redhat.com> + +commit feff8fa0075bdfd43c841e9d689ed81adda988d6 upstream. + +This patch fixes the following memory leak: + +unreferenced object 0xffff880107266800 (size 512): + comm "sched-powersave", pid 3718, jiffies 4323097853 (age 27495.450s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<ffffffff81133940>] create_object+0x187/0x28b + [<ffffffff814ac103>] kmemleak_alloc+0x73/0x98 + [<ffffffff811232ba>] __kmalloc_node+0x104/0x159 + [<ffffffff81044b98>] kzalloc_node.clone.97+0x15/0x17 + [<ffffffff8104cb90>] build_sched_domains+0xb7/0x7f3 + [<ffffffff8104d4df>] partition_sched_domains+0x1db/0x24a + [<ffffffff8109ee4a>] do_rebuild_sched_domains+0x3b/0x47 + [<ffffffff810a00c7>] rebuild_sched_domains+0x10/0x12 + [<ffffffff8104d5ba>] sched_power_savings_store+0x6c/0x7b + [<ffffffff8104d5df>] sched_mc_power_savings_store+0x16/0x18 + [<ffffffff8131322c>] sysdev_class_store+0x20/0x22 + [<ffffffff81193876>] sysfs_write_file+0x108/0x144 + [<ffffffff81135b10>] vfs_write+0xaf/0x102 + [<ffffffff81135d23>] sys_write+0x4d/0x74 + [<ffffffff814c8a42>] system_call_fastpath+0x16/0x1b + [<ffffffffffffffff>] 0xffffffffffffffff + +Signed-off-by: WANG Cong <amwang@redhat.com> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Link: http://lkml.kernel.org/r/1313671017-4112-1-git-send-email-amwang@redhat.com +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/sched.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -7423,6 +7423,7 @@ static void __sdt_free(const struct cpum + struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); + if (sd && (sd->flags & SD_OVERLAP)) + free_sched_groups(sd->groups, 0); ++ kfree(*per_cpu_ptr(sdd->sd, j)); + kfree(*per_cpu_ptr(sdd->sg, j)); + kfree(*per_cpu_ptr(sdd->sgp, j)); + } diff --git a/queue-3.0/sched-move-blk_schedule_flush_plug-out-of-__schedule.patch b/queue-3.0/sched-move-blk_schedule_flush_plug-out-of-__schedule.patch new file mode 100644 index 0000000000..fc88bcc54c --- /dev/null +++ b/queue-3.0/sched-move-blk_schedule_flush_plug-out-of-__schedule.patch @@ -0,0 +1,77 @@ +From 9c40cef2b799f9b5e7fa5de4d2ad3a0168ba118c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 22 Jun 2011 19:47:01 +0200 +Subject: sched: Move blk_schedule_flush_plug() out of __schedule() + +From: Thomas Gleixner <tglx@linutronix.de> + +commit 9c40cef2b799f9b5e7fa5de4d2ad3a0168ba118c upstream. + +There is no real reason to run blk_schedule_flush_plug() with +interrupts and preemption disabled. + +Move it into schedule() and call it when the task is going voluntarily +to sleep. There might be false positives when the task is woken +between that call and actually scheduling, but that's not really +different from being woken immediately after switching away. + +This fixes a deadlock in the scheduler where the +blk_schedule_flush_plug() callchain enables interrupts and thereby +allows a wakeup to happen of the task that's going to sleep. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Cc: Tejun Heo <tj@kernel.org> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/n/tip-dwfxtra7yg1b5r65m32ywtct@git.kernel.org +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/sched.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -4285,16 +4285,6 @@ need_resched: + if (to_wakeup) + try_to_wake_up_local(to_wakeup); + } +- +- /* +- * If we are going to sleep and we have plugged IO +- * queued, make sure to submit it to avoid deadlocks. +- */ +- if (blk_needs_flush_plug(prev)) { +- raw_spin_unlock(&rq->lock); +- blk_schedule_flush_plug(prev); +- raw_spin_lock(&rq->lock); +- } + } + switch_count = &prev->nvcsw; + } +@@ -4333,8 +4323,23 @@ need_resched: + goto need_resched; + } + ++static inline void sched_submit_work(struct task_struct *tsk) ++{ ++ if (!tsk->state) ++ return; ++ /* ++ * If we are going to sleep and we have plugged IO queued, ++ * make sure to submit it to avoid deadlocks. ++ */ ++ if (blk_needs_flush_plug(tsk)) ++ blk_schedule_flush_plug(tsk); ++} ++ + asmlinkage void schedule(void) + { ++ struct task_struct *tsk = current; ++ ++ sched_submit_work(tsk); + __schedule(); + } + EXPORT_SYMBOL(schedule); diff --git a/queue-3.0/sched-separate-the-scheduler-entry-for-preemption.patch b/queue-3.0/sched-separate-the-scheduler-entry-for-preemption.patch new file mode 100644 index 0000000000..32abdb6155 --- /dev/null +++ b/queue-3.0/sched-separate-the-scheduler-entry-for-preemption.patch @@ -0,0 +1,85 @@ +From c259e01a1ec90063042f758e409cd26b2a0963c8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 22 Jun 2011 19:47:00 +0200 +Subject: sched: Separate the scheduler entry for preemption + +From: Thomas Gleixner <tglx@linutronix.de> + +commit c259e01a1ec90063042f758e409cd26b2a0963c8 upstream. + +Block-IO and workqueues call into notifier functions from the +scheduler core code with interrupts and preemption disabled. These +calls should be made before entering the scheduler core. + +To simplify this, separate the scheduler core code into +__schedule(). __schedule() is directly called from the places which +set PREEMPT_ACTIVE and from schedule(). This allows us to add the work +checks into schedule(), so they are only called when a task voluntary +goes to sleep. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Cc: Tejun Heo <tj@kernel.org> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/20110622174918.813258321@linutronix.de +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/sched.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -4242,9 +4242,9 @@ pick_next_task(struct rq *rq) + } + + /* +- * schedule() is the main scheduler function. ++ * __schedule() is the main scheduler function. + */ +-asmlinkage void __sched schedule(void) ++static void __sched __schedule(void) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -4332,6 +4332,11 @@ need_resched: + if (need_resched()) + goto need_resched; + } ++ ++asmlinkage void schedule(void) ++{ ++ __schedule(); ++} + EXPORT_SYMBOL(schedule); + + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +@@ -4405,7 +4410,7 @@ asmlinkage void __sched notrace preempt_ + + do { + add_preempt_count_notrace(PREEMPT_ACTIVE); +- schedule(); ++ __schedule(); + sub_preempt_count_notrace(PREEMPT_ACTIVE); + + /* +@@ -4433,7 +4438,7 @@ asmlinkage void __sched preempt_schedule + do { + add_preempt_count(PREEMPT_ACTIVE); + local_irq_enable(); +- schedule(); ++ __schedule(); + local_irq_disable(); + sub_preempt_count(PREEMPT_ACTIVE); + +@@ -5558,7 +5563,7 @@ static inline int should_resched(void) + static void __cond_resched(void) + { + add_preempt_count(PREEMPT_ACTIVE); +- schedule(); ++ __schedule(); + sub_preempt_count(PREEMPT_ACTIVE); + } + diff --git a/queue-3.0/series b/queue-3.0/series index e72eb760e4..98e3de6523 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -54,3 +54,17 @@ mm-page-allocator-initialise-zlc-for-first-zone-eligible-for-zone_reclaim.patch mm-page-allocator-reconsider-zones-for-allocation-after-direct-reclaim.patch igb-fix-wol-on-second-port-of-i350-device.patch mxc-iomux-v3-correct-no_pad_ctrl-definition.patch +alarmtimers-avoid-possible-null-pointer-traversal.patch +alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch +alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch +rtc-fix-rtc-pie-frequency-limit.patch +sched-separate-the-scheduler-entry-for-preemption.patch +sched-move-blk_schedule_flush_plug-out-of-__schedule.patch +sched-fix-a-memory-leak-in-__sdt_free.patch +x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch +mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch +mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch +mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch +mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch +xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch +xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch diff --git a/queue-3.0/x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch b/queue-3.0/x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch new file mode 100644 index 0000000000..55f975e3ed --- /dev/null +++ b/queue-3.0/x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch @@ -0,0 +1,63 @@ +From 20afc60f892d285fde179ead4b24e6a7938c2f1b Mon Sep 17 00:00:00 2001 +From: Andrey Vagin <avagin@openvz.org> +Date: Tue, 30 Aug 2011 12:32:36 +0400 +Subject: x86, perf: Check that current->mm is alive before getting user callchain + +From: Andrey Vagin <avagin@openvz.org> + +commit 20afc60f892d285fde179ead4b24e6a7938c2f1b upstream. + +An event may occur when an mm is already released. + +I added an event in dequeue_entity() and caught a panic with +the following backtrace: + +[ 434.421110] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 +[ 434.421258] IP: [<ffffffff810464ac>] __get_user_pages_fast+0x9c/0x120 +... +[ 434.421258] Call Trace: +[ 434.421258] [<ffffffff8101ae81>] copy_from_user_nmi+0x51/0xf0 +[ 434.421258] [<ffffffff8109a0d5>] ? sched_clock_local+0x25/0x90 +[ 434.421258] [<ffffffff8101b048>] perf_callchain_user+0x128/0x170 +[ 434.421258] [<ffffffff811154cd>] ? __perf_event_header__init_id+0xed/0x100 +[ 434.421258] [<ffffffff81116690>] perf_prepare_sample+0x200/0x280 +[ 434.421258] [<ffffffff81118da8>] __perf_event_overflow+0x1b8/0x290 +[ 434.421258] [<ffffffff81065240>] ? tg_shares_up+0x0/0x670 +[ 434.421258] [<ffffffff8104fe1a>] ? walk_tg_tree+0x6a/0xb0 +[ 434.421258] [<ffffffff81118f44>] perf_swevent_overflow+0xc4/0xf0 +[ 434.421258] [<ffffffff81119150>] do_perf_sw_event+0x1e0/0x250 +[ 434.421258] [<ffffffff81119204>] perf_tp_event+0x44/0x70 +[ 434.421258] [<ffffffff8105701f>] ftrace_profile_sched_block+0xdf/0x110 +[ 434.421258] [<ffffffff8106121d>] dequeue_entity+0x2ad/0x2d0 +[ 434.421258] [<ffffffff810614ec>] dequeue_task_fair+0x1c/0x60 +[ 434.421258] [<ffffffff8105818a>] dequeue_task+0x9a/0xb0 +[ 434.421258] [<ffffffff810581e2>] deactivate_task+0x42/0xe0 +[ 434.421258] [<ffffffff814bc019>] thread_return+0x191/0x808 +[ 434.421258] [<ffffffff81098a44>] ? switch_task_namespaces+0x24/0x60 +[ 434.421258] [<ffffffff8106f4c4>] do_exit+0x464/0x910 +[ 434.421258] [<ffffffff8106f9c8>] do_group_exit+0x58/0xd0 +[ 434.421258] [<ffffffff8106fa57>] sys_exit_group+0x17/0x20 +[ 434.421258] [<ffffffff8100b202>] system_call_fastpath+0x16/0x1b + +Signed-off-by: Andrey Vagin <avagin@openvz.org> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Link: http://lkml.kernel.org/r/1314693156-24131-1-git-send-email-avagin@openvz.org +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/kernel/cpu/perf_event.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kernel/cpu/perf_event.c ++++ b/arch/x86/kernel/cpu/perf_event.c +@@ -1856,6 +1856,9 @@ perf_callchain_user(struct perf_callchai + + perf_callchain_store(entry, regs->ip); + ++ if (!current->mm) ++ return; ++ + if (perf_callchain_user32(regs, entry)) + return; + diff --git a/queue-3.0/xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch b/queue-3.0/xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch new file mode 100644 index 0000000000..83cf0472ec --- /dev/null +++ b/queue-3.0/xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch @@ -0,0 +1,75 @@ +From d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11 Mon Sep 17 00:00:00 2001 +From: David Vrabel <david.vrabel@citrix.com> +Date: Fri, 19 Aug 2011 15:57:16 +0100 +Subject: xen: use maximum reservation to limit amount of usable RAM + +From: David Vrabel <david.vrabel@citrix.com> + +commit d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11 upstream. + +Use the domain's maximum reservation to limit the amount of extra RAM +for the memory balloon. This reduces the size of the pages tables and +the amount of reserved low memory (which defaults to about 1/32 of the +total RAM). + +On a system with 8 GiB of RAM with the domain limited to 1 GiB the +kernel reports: + +Before: + +Memory: 627792k/4472000k available + +After: + +Memory: 549740k/11132224k available + +A increase of about 76 MiB (~1.5% of the unused 7 GiB). The reserved +low memory is also reduced from 253 MiB to 32 MiB. The total +additional usable RAM is 329 MiB. + +For dom0, this requires at patch to Xen ('x86: use 'dom0_mem' to limit +the number of pages for dom0') (c/s 23790) + +Signed-off-by: David Vrabel <david.vrabel@citrix.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/xen/setup.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -185,6 +185,19 @@ static unsigned long __init xen_set_iden + PFN_UP(start_pci), PFN_DOWN(last)); + return identity; + } ++ ++static unsigned long __init xen_get_max_pages(void) ++{ ++ unsigned long max_pages = MAX_DOMAIN_PAGES; ++ domid_t domid = DOMID_SELF; ++ int ret; ++ ++ ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); ++ if (ret > 0) ++ max_pages = ret; ++ return min(max_pages, MAX_DOMAIN_PAGES); ++} ++ + /** + * machine_specific_memory_setup - Hook for machine specific memory setup. + **/ +@@ -293,6 +306,12 @@ char * __init xen_memory_setup(void) + + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + ++ extra_limit = xen_get_max_pages(); ++ if (extra_limit >= max_pfn) ++ extra_pages = extra_limit - max_pfn; ++ else ++ extra_pages = 0; ++ + extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); + + /* diff --git a/queue-3.0/xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch b/queue-3.0/xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch new file mode 100644 index 0000000000..499fd898b3 --- /dev/null +++ b/queue-3.0/xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch @@ -0,0 +1,57 @@ +From d198d499148a0c64a41b3aba9e7dd43772832b91 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov <imammedo@redhat.com> +Date: Thu, 1 Sep 2011 13:46:55 +0200 +Subject: xen: x86_32: do not enable iterrupts when returning from exception in interrupt context + +From: Igor Mammedov <imammedo@redhat.com> + +commit d198d499148a0c64a41b3aba9e7dd43772832b91 upstream. + +If vmalloc page_fault happens inside of interrupt handler with interrupts +disabled then on exit path from exception handler when there is no pending +interrupts, the following code (arch/x86/xen/xen-asm_32.S:112): + + cmpw $0x0001, XEN_vcpu_info_pending(%eax) + sete XEN_vcpu_info_mask(%eax) + +will enable interrupts even if they has been previously disabled according to +eflags from the bounce frame (arch/x86/xen/xen-asm_32.S:99) + + testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) + setz XEN_vcpu_info_mask(%eax) + +Solution is in setting XEN_vcpu_info_mask only when it should be set +according to + cmpw $0x0001, XEN_vcpu_info_pending(%eax) +but not clearing it if there isn't any pending events. + +Reproducer for bug is attached to RHBZ 707552 + +Signed-off-by: Igor Mammedov <imammedo@redhat.com> +Acked-by: Jeremy Fitzhardinge <jeremy@goop.org> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/xen/xen-asm_32.S | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/x86/xen/xen-asm_32.S ++++ b/arch/x86/xen/xen-asm_32.S +@@ -113,11 +113,13 @@ xen_iret_start_crit: + + /* + * If there's something pending, mask events again so we can +- * jump back into xen_hypervisor_callback ++ * jump back into xen_hypervisor_callback. Otherwise do not ++ * touch XEN_vcpu_info_mask. + */ +- sete XEN_vcpu_info_mask(%eax) ++ jne 1f ++ movb $1, XEN_vcpu_info_mask(%eax) + +- popl %eax ++1: popl %eax + + /* + * From this point on the registers are restored and the stack |