diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-19 09:58:51 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-19 09:58:51 -0700 |
commit | 947bd36fdaa2b980949a83579f04076b7e90dc6a (patch) | |
tree | 4c93486d66e5c0ff134f2536e2af87075d41e491 | |
parent | 18651efa7d309f1cd2c1ed1a305a654fc645b5a3 (diff) | |
download | stable-queue-947bd36fdaa2b980949a83579f04076b7e90dc6a.tar.gz |
3.0 patches
7 files changed, 563 insertions, 0 deletions
diff --git a/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch b/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch new file mode 100644 index 0000000000..3c53bf3500 --- /dev/null +++ b/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch @@ -0,0 +1,179 @@ +From 55a673990ec04cf63005318bcf08c2b0046e5778 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust <Trond.Myklebust@netapp.com> +Date: Tue, 2 Aug 2011 14:46:29 -0400 +Subject: NFSv4.1: Fix the callback 'highest_used_slotid' behaviour + +From: Trond Myklebust <Trond.Myklebust@netapp.com> + +commit 55a673990ec04cf63005318bcf08c2b0046e5778 upstream. + +Currently, there is no guarantee that we will call nfs4_cb_take_slot() even +though nfs4_callback_compound() will consistently call +nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field. +The result is that we can trigger the BUG_ON() upon the next call to +nfs4_cb_take_slot(). + +This patch fixes the above problem by using the slot id that was taken in +the CB_SEQUENCE operation as a flag for whether or not we need to call +nfs4_cb_free_slot(). +It also fixes an atomicity problem: we need to set tbl->highest_used_slotid +atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up +racing with the various tests in nfs4_begin_drain_session(). + +Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/nfs/callback.h | 2 +- + fs/nfs/callback_proc.c | 20 ++++++++++++++------ + fs/nfs/callback_xdr.c | 24 +++++++----------------- + 3 files changed, 22 insertions(+), 24 deletions(-) + +--- a/fs/nfs/callback.h ++++ b/fs/nfs/callback.h +@@ -38,6 +38,7 @@ enum nfs4_callback_opnum { + struct cb_process_state { + __be32 drc_status; + struct nfs_client *clp; ++ int slotid; + }; + + struct cb_compound_hdr_arg { +@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutreca + void *dummy, struct cb_process_state *cps); + + extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); +-extern void nfs4_cb_take_slot(struct nfs_client *clp); + + struct cb_devicenotifyitem { + uint32_t cbd_notify_type; +--- a/fs/nfs/callback_proc.c ++++ b/fs/nfs/callback_proc.c +@@ -333,7 +333,7 @@ validate_seqid(struct nfs4_slot_table *t + /* Normal */ + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { + slot->seq_nr++; +- return htonl(NFS4_OK); ++ goto out_ok; + } + + /* Replay */ +@@ -352,11 +352,14 @@ validate_seqid(struct nfs4_slot_table *t + /* Wraparound */ + if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { + slot->seq_nr = 1; +- return htonl(NFS4_OK); ++ goto out_ok; + } + + /* Misordered request */ + return htonl(NFS4ERR_SEQ_MISORDERED); ++out_ok: ++ tbl->highest_used_slotid = args->csa_slotid; ++ return htonl(NFS4_OK); + } + + /* +@@ -418,26 +421,32 @@ __be32 nfs4_callback_sequence(struct cb_ + struct cb_sequenceres *res, + struct cb_process_state *cps) + { ++ struct nfs4_slot_table *tbl; + struct nfs_client *clp; + int i; + __be32 status = htonl(NFS4ERR_BADSESSION); + +- cps->clp = NULL; +- + clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); + if (clp == NULL) + goto out; + ++ tbl = &clp->cl_session->bc_slot_table; ++ ++ spin_lock(&tbl->slot_tbl_lock); + /* state manager is resetting the session */ + if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { +- status = NFS4ERR_DELAY; ++ spin_unlock(&tbl->slot_tbl_lock); ++ status = htonl(NFS4ERR_DELAY); + goto out; + } + + status = validate_seqid(&clp->cl_session->bc_slot_table, args); ++ spin_unlock(&tbl->slot_tbl_lock); + if (status) + goto out; + ++ cps->slotid = args->csa_slotid; ++ + /* + * Check for pending referring calls. If a match is found, a + * related callback was received before the response to the original +@@ -454,7 +463,6 @@ __be32 nfs4_callback_sequence(struct cb_ + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; +- nfs4_cb_take_slot(clp); + + out: + cps->clp = clp; /* put in nfs4_callback_compound */ +--- a/fs/nfs/callback_xdr.c ++++ b/fs/nfs/callback_xdr.c +@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(stru + * Let the state manager know callback processing done. + * A single slot, so highest used slotid is either 0 or -1 + */ +- tbl->highest_used_slotid--; ++ tbl->highest_used_slotid = -1; + nfs4_check_drain_bc_complete(session); + spin_unlock(&tbl->slot_tbl_lock); + } + +-static void nfs4_cb_free_slot(struct nfs_client *clp) ++static void nfs4_cb_free_slot(struct cb_process_state *cps) + { +- if (clp && clp->cl_session) +- nfs4_callback_free_slot(clp->cl_session); +-} +- +-/* A single slot, so highest used slotid is either 0 or -1 */ +-void nfs4_cb_take_slot(struct nfs_client *clp) +-{ +- struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; +- +- spin_lock(&tbl->slot_tbl_lock); +- tbl->highest_used_slotid++; +- BUG_ON(tbl->highest_used_slotid != 0); +- spin_unlock(&tbl->slot_tbl_lock); ++ if (cps->slotid != -1) ++ nfs4_callback_free_slot(cps->clp->cl_session); + } + + #else /* CONFIG_NFS_V4_1 */ +@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned in + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } + +-static void nfs4_cb_free_slot(struct nfs_client *clp) ++static void nfs4_cb_free_slot(struct cb_process_state *cps) + { + } + #endif /* CONFIG_NFS_V4_1 */ +@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(str + struct cb_process_state cps = { + .drc_status = 0, + .clp = NULL, ++ .slotid = -1, + }; + unsigned int nops = 0; + +@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(str + + *hdr_res.status = status; + *hdr_res.nops = htonl(nops); +- nfs4_cb_free_slot(cps.clp); ++ nfs4_cb_free_slot(&cps); + nfs_put_client(cps.clp); + dprintk("%s: done, status = %u\n", __func__, ntohl(status)); + return rpc_success; diff --git a/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch b/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch new file mode 100644 index 0000000000..48806c3dfd --- /dev/null +++ b/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch @@ -0,0 +1,44 @@ +From 910ac68a2b80c7de95bc8488734067b1bb15d583 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust <Trond.Myklebust@netapp.com> +Date: Tue, 2 Aug 2011 14:46:52 -0400 +Subject: NFSv4.1: Return NFS4ERR_BADSESSION to callbacks during + session resets + +From: Trond Myklebust <Trond.Myklebust@netapp.com> + +commit 910ac68a2b80c7de95bc8488734067b1bb15d583 upstream. + +If the client is in the process of resetting the session when it receives +a callback, then returning NFS4ERR_DELAY may cause a deadlock with the +DESTROY_SESSION call. + +Basically, if the client returns NFS4ERR_DELAY in response to the +CB_SEQUENCE call, then the server is entitled to believe that the +client is busy because it is already processing that call. In that +case, the server is perfectly entitled to respond with a +NFS4ERR_BACK_CHAN_BUSY to any DESTROY_SESSION call. + +Fix this by having the client reply with a NFS4ERR_BADSESSION in +response to the callback if it is resetting the session. + +Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/nfs/callback_proc.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/nfs/callback_proc.c ++++ b/fs/nfs/callback_proc.c +@@ -437,6 +437,11 @@ __be32 nfs4_callback_sequence(struct cb_ + if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { + spin_unlock(&tbl->slot_tbl_lock); + status = htonl(NFS4ERR_DELAY); ++ /* Return NFS4ERR_BADSESSION if we're draining the session ++ * in order to reset it. ++ */ ++ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) ++ status = htonl(NFS4ERR_BADSESSION); + goto out; + } + diff --git a/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch b/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch new file mode 100644 index 0000000000..065f37e90f --- /dev/null +++ b/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch @@ -0,0 +1,59 @@ +From 6d0e194d2eefcaab6dbdca1f639748660144acb5 Mon Sep 17 00:00:00 2001 +From: Tejun Heo <tj@kernel.org> +Date: Thu, 4 Aug 2011 11:15:07 +0200 +Subject: pata_via: disable ATAPI DMA on AVERATEC 3200 + +From: Tejun Heo <tj@kernel.org> + +commit 6d0e194d2eefcaab6dbdca1f639748660144acb5 upstream. + +On AVERATEC 3200, pata_via causes memory corruption with ATAPI DMA, +which often leads to random kernel oops. The cause of the problem is +not well understood yet and only small subset of machines using the +controller seem affected. Blacklist ATAPI DMA on the machine. + +Signed-off-by: Tejun Heo <tj@kernel.org> +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=11426 +Reported-and-tested-by: Jim Bray <jimsantelmo@gmail.com> +Cc: Alan Cox <alan@linux.intel.com> +Signed-off-by: Jeff Garzik <jgarzik@pobox.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/ata/pata_via.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/drivers/ata/pata_via.c ++++ b/drivers/ata/pata_via.c +@@ -124,6 +124,17 @@ static const struct via_isa_bridge { + { NULL } + }; + ++static const struct dmi_system_id no_atapi_dma_dmi_table[] = { ++ { ++ .ident = "AVERATEC 3200", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"), ++ DMI_MATCH(DMI_BOARD_NAME, "3200"), ++ }, ++ }, ++ { } ++}; ++ + struct via_port { + u8 cached_device; + }; +@@ -355,6 +366,13 @@ static unsigned long via_mode_filter(str + mask &= ~ ATA_MASK_UDMA; + } + } ++ ++ if (dev->class == ATA_DEV_ATAPI && ++ dmi_check_system(no_atapi_dma_dmi_table)) { ++ ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); ++ mask &= ATA_MASK_PIO; ++ } ++ + return mask; + } + diff --git a/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch b/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch new file mode 100644 index 0000000000..a9db245bdf --- /dev/null +++ b/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch @@ -0,0 +1,69 @@ +From 20618b21da0796115e81906d24ff1601552701b7 Mon Sep 17 00:00:00 2001 +From: Boaz Harrosh <bharrosh@panasas.com> +Date: Wed, 3 Aug 2011 21:54:33 -0700 +Subject: pnfs-obj: Bug when we are running out of bio + +From: Boaz Harrosh <bharrosh@panasas.com> + +commit 20618b21da0796115e81906d24ff1601552701b7 upstream. + +When we have a situation that the number of pages we want +to encode is bigger then the size of the bio. (Which can +currently happen only when all IO is going to a single device +.e.g group_width==1) then the IO is submitted short and we +report back only the amount of bytes we actually wrote/read +and all is fine. BUT ... + +There was a bug that the current length counter was advanced +before the fail to add the extra page, and we come to a situation +that the CDB length was one-page longer then the actual bio size, +which is of course rejected by the osd-target. + +While here also fix the bio size calculation, in the case +that we received more then one group of devices. + +Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> +Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/nfs/objlayout/objio_osd.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/fs/nfs/objlayout/objio_osd.c ++++ b/fs/nfs/objlayout/objio_osd.c +@@ -587,22 +587,19 @@ static void _calc_stripe_info(struct obj + } + + static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, +- unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, ++ unsigned pgbase, struct _objio_per_comp *per_dev, int len, + gfp_t gfp_flags) + { + unsigned pg = *cur_pg; ++ int cur_len = len; + struct request_queue *q = + osd_request_queue(_io_od(ios, per_dev->dev)); + +- per_dev->length += cur_len; +- + if (per_dev->bio == NULL) { +- unsigned stripes = ios->layout->num_comps / +- ios->layout->mirrors_p1; +- unsigned pages_in_stripe = stripes * ++ unsigned pages_in_stripe = ios->layout->group_width * + (ios->layout->stripe_unit / PAGE_SIZE); + unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / +- stripes; ++ ios->layout->group_width; + + if (BIO_MAX_PAGES_KMALLOC < bio_size) + bio_size = BIO_MAX_PAGES_KMALLOC; +@@ -630,6 +627,7 @@ static int _add_stripe_unit(struct objio + } + BUG_ON(cur_len); + ++ per_dev->length += len; + *cur_pg = pg; + return 0; + } diff --git a/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch b/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch new file mode 100644 index 0000000000..6d10741b5a --- /dev/null +++ b/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch @@ -0,0 +1,97 @@ +From 9af7db3228acc286c50e3a0f054ec982efdbc6c6 Mon Sep 17 00:00:00 2001 +From: Boaz Harrosh <bharrosh@panasas.com> +Date: Wed, 3 Aug 2011 21:52:51 -0700 +Subject: pnfs-obj: Fix the comp_index != 0 case + +From: Boaz Harrosh <bharrosh@panasas.com> + +commit 9af7db3228acc286c50e3a0f054ec982efdbc6c6 upstream. + +There were bugs in the case of partial layout where olo_comp_index +is not zero. This used to work and was tested but one of the later +cleanup SQUASHMEs broke it and was not tested since. + +Also add a dprint that specify those received layout parameters. +Everything else was already printed. + +Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> +Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/nfs/objlayout/objio_osd.c | 16 +++++++--------- + fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 3 +++ + 2 files changed, 10 insertions(+), 9 deletions(-) + +--- a/fs/nfs/objlayout/objio_osd.c ++++ b/fs/nfs/objlayout/objio_osd.c +@@ -479,7 +479,6 @@ static int _io_check(struct objio_state + for (i = 0; i < ios->numdevs; i++) { + struct osd_sense_info osi; + struct osd_request *or = ios->per_dev[i].or; +- unsigned dev; + int ret; + + if (!or) +@@ -500,9 +499,8 @@ static int _io_check(struct objio_state + + continue; /* we recovered */ + } +- dev = ios->per_dev[i].dev; +- objlayout_io_set_result(&ios->ol_state, dev, +- &ios->layout->comps[dev].oc_object_id, ++ objlayout_io_set_result(&ios->ol_state, i, ++ &ios->layout->comps[i].oc_object_id, + osd_pri_2_pnfs_err(osi.osd_err_pri), + ios->per_dev[i].offset, + ios->per_dev[i].length, +@@ -650,7 +648,7 @@ static int _prepare_one_group(struct obj + int ret = 0; + + while (length) { +- struct _objio_per_comp *per_dev = &ios->per_dev[dev]; ++ struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; + unsigned cur_len, page_off = 0; + + if (!per_dev->length) { +@@ -670,8 +668,8 @@ static int _prepare_one_group(struct obj + cur_len = stripe_unit; + } + +- if (max_comp < dev) +- max_comp = dev; ++ if (max_comp < dev - first_dev) ++ max_comp = dev - first_dev; + } else { + cur_len = stripe_unit; + } +@@ -806,7 +804,7 @@ static int _read_mirrors(struct objio_st + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; + unsigned dev = per_dev->dev; + struct pnfs_osd_object_cred *cred = +- &ios->layout->comps[dev]; ++ &ios->layout->comps[cur_comp]; + struct osd_obj_id obj = { + .partition = cred->oc_object_id.oid_partition_id, + .id = cred->oc_object_id.oid_object_id, +@@ -904,7 +902,7 @@ static int _write_mirrors(struct objio_s + for (; cur_comp < last_comp; ++cur_comp, ++dev) { + struct osd_request *or = NULL; + struct pnfs_osd_object_cred *cred = +- &ios->layout->comps[dev]; ++ &ios->layout->comps[cur_comp]; + struct osd_obj_id obj = { + .partition = cred->oc_object_id.oid_partition_id, + .id = cred->oc_object_id.oid_object_id, +--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c ++++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struc + p = _osd_xdr_decode_data_map(p, &layout->olo_map); + layout->olo_comps_index = be32_to_cpup(p++); + layout->olo_num_comps = be32_to_cpup(p++); ++ dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, ++ layout->olo_comps_index, layout->olo_num_comps); ++ + iter->total_comps = layout->olo_num_comps; + return 0; + } diff --git a/queue-3.0/series b/queue-3.0/series index 7a03840847..07bb57d881 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -4,3 +4,9 @@ befs-validate-length-of-long-symbolic-links.patch i7core_edac-fixed-typo-in-error-count-calculation.patch possible-memory-corruption-on-mount.patch x86-intel-power-correct-the-msr_ia32_energy_perf_bias.patch +pata_via-disable-atapi-dma-on-averatec-3200.patch +pnfs-obj-fix-the-comp_index-0-case.patch +pnfs-obj-bug-when-we-are-running-out-of-bio.patch +nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch +nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch +x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch diff --git a/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch b/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch new file mode 100644 index 0000000000..02094b1768 --- /dev/null +++ b/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch @@ -0,0 +1,109 @@ +From 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 Mon Sep 17 00:00:00 2001 +From: Suresh Siddha <suresh.b.siddha@intel.com> +Date: Thu, 23 Jun 2011 11:19:26 -0700 +Subject: x86, mtrr: lock stop machine during MTRR rendezvous sequence + +From: Suresh Siddha <suresh.b.siddha@intel.com> + +commit 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 upstream. + +MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially +happen in parallel with another system wide rendezvous using +stop_machine(). This can lead to deadlock (The order in which +works are queued can be different on different cpu's. Some cpu's +will be running the first rendezvous handler and others will be running +the second rendezvous handler. Each set waiting for the other set to join +for the system wide rendezvous, leading to a deadlock). + +MTRR rendezvous sequence is not implemented using stop_machine() as this +gets called both from the process context aswell as the cpu online paths +(where the cpu has not come online and the interrupts are disabled etc). +stop_machine() works with only online cpus. + +For now, take the stop_machine mutex in the MTRR rendezvous sequence that +gets called from an online cpu (here we are in the process context +and can potentially sleep while taking the mutex). And the MTRR rendezvous +that gets triggered during cpu online doesn't need to take this stop_machine +lock (as the stop_machine() already ensures that there is no cpu hotplug +going on in parallel by doing get_online_cpus()) + + TBD: Pursue a cleaner solution of extending the stop_machine() + infrastructure to handle the case where the calling cpu is + still not online and use this for MTRR rendezvous sequence. + +fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008 + +Reported-by: Vadim Kotelnikov <vadimuzzz@inbox.ru> +Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> +Link: http://lkml.kernel.org/r/20110623182056.807230326@sbsiddha-MOBL3.sc.intel.com +Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/kernel/cpu/mtrr/main.c | 23 +++++++++++++++++++++++ + include/linux/stop_machine.h | 2 ++ + kernel/stop_machine.c | 2 +- + 3 files changed, 26 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/mtrr/main.c ++++ b/arch/x86/kernel/cpu/mtrr/main.c +@@ -248,6 +248,25 @@ set_mtrr(unsigned int reg, unsigned long + unsigned long flags; + int cpu; + ++#ifdef CONFIG_SMP ++ /* ++ * If this cpu is not yet active, we are in the cpu online path. There ++ * can be no stop_machine() in parallel, as stop machine ensures this ++ * by using get_online_cpus(). We can skip taking the stop_cpus_mutex, ++ * as we don't need it and also we can't afford to block while waiting ++ * for the mutex. ++ * ++ * If this cpu is active, we need to prevent stop_machine() happening ++ * in parallel by taking the stop cpus mutex. ++ * ++ * Also, this is called in the context of cpu online path or in the ++ * context where cpu hotplug is prevented. So checking the active status ++ * of the raw_smp_processor_id() is safe. ++ */ ++ if (cpu_active(raw_smp_processor_id())) ++ mutex_lock(&stop_cpus_mutex); ++#endif ++ + preempt_disable(); + + data.smp_reg = reg; +@@ -330,6 +349,10 @@ set_mtrr(unsigned int reg, unsigned long + + local_irq_restore(flags); + preempt_enable(); ++#ifdef CONFIG_SMP ++ if (cpu_active(raw_smp_processor_id())) ++ mutex_unlock(&stop_cpus_mutex); ++#endif + } + + /** +--- a/include/linux/stop_machine.h ++++ b/include/linux/stop_machine.h +@@ -27,6 +27,8 @@ struct cpu_stop_work { + struct cpu_stop_done *done; + }; + ++extern struct mutex stop_cpus_mutex; ++ + int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); + void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, + struct cpu_stop_work *work_buf); +--- a/kernel/stop_machine.c ++++ b/kernel/stop_machine.c +@@ -132,8 +132,8 @@ void stop_one_cpu_nowait(unsigned int cp + cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); + } + ++DEFINE_MUTEX(stop_cpus_mutex); + /* static data for stop_cpus */ +-static DEFINE_MUTEX(stop_cpus_mutex); + static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); + + int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) |