summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@suse.de>2011-10-03 14:36:48 -0700
committerGreg Kroah-Hartman <gregkh@suse.de>2011-10-03 14:36:48 -0700
commit410449452dd3868bff8971f247c7f42652d7bab3 (patch)
treec85585ed990683c9ef2ce96a98e1a7bcb1fd86e7
parent380dd8e0ffba79547f9346ebb6a943eae837c2b5 (diff)
downloadstable-queue-410449452dd3868bff8971f247c7f42652d7bab3.tar.gz
3.0 patches
-rw-r--r--queue-3.0/ide-disk-fix-request-requeuing.patch47
-rw-r--r--queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch41
-rw-r--r--queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch31
-rw-r--r--queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch117
-rw-r--r--queue-3.0/pci-series4
-rw-r--r--queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch445
-rw-r--r--queue-3.0/sched-fix-up-wchan-borkage.patch35
-rw-r--r--queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch57
-rw-r--r--queue-3.0/series9
-rw-r--r--queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch58
10 files changed, 844 insertions, 0 deletions
diff --git a/queue-3.0/ide-disk-fix-request-requeuing.patch b/queue-3.0/ide-disk-fix-request-requeuing.patch
new file mode 100644
index 0000000000..13b2644cc4
--- /dev/null
+++ b/queue-3.0/ide-disk-fix-request-requeuing.patch
@@ -0,0 +1,47 @@
+From 2c8fc867602e385fd2abe76da0b6bda8ed907547 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@alien8.de>
+Date: Mon, 3 Oct 2011 14:28:18 -0400
+Subject: ide-disk: Fix request requeuing
+
+From: Borislav Petkov <bp@alien8.de>
+
+commit 2c8fc867602e385fd2abe76da0b6bda8ed907547 upstream.
+
+Simon Kirby reported that on his RAID setup with idedisk underneath
+the box OOMs after a couple of days of runtime. Running with
+CONFIG_DEBUG_KMEMLEAK pointed to idedisk_prep_fn() which unconditionally
+allocates an ide_cmd struct. However, ide_requeue_and_plug() can be
+called more than once per request, either from the request issue or the
+IRQ handler path and do blk_peek_request() ends up in idedisk_prep_fn()
+repeatedly, allocating a struct ide_cmd everytime and "forgetting" the
+previous pointer.
+
+Make sure the code reuses the old allocated chunk.
+
+Reported-and-tested-by: Simon Kirby <sim@hostway.ca>
+Link: http://marc.info/?l=linux-kernel&m=131667641517919
+Link: http://lkml.kernel.org/r/20110922072643.GA27232@hostway.ca
+Signed-off-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ide/ide-disk.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/ide/ide-disk.c
++++ b/drivers/ide/ide-disk.c
+@@ -435,7 +435,12 @@ static int idedisk_prep_fn(struct reques
+ if (!(rq->cmd_flags & REQ_FLUSH))
+ return BLKPREP_OK;
+
+- cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
++ if (rq->special) {
++ cmd = rq->special;
++ memset(cmd, 0, sizeof(*cmd));
++ } else {
++ cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
++ }
+
+ /* FIXME: map struct ide_taskfile on rq->cmd[] */
+ BUG_ON(cmd == NULL);
diff --git a/queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch b/queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch
new file mode 100644
index 0000000000..9fcb7fdc48
--- /dev/null
+++ b/queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch
@@ -0,0 +1,41 @@
+From 1a4b1a41b8a3d5256019854e851beed063b34344 Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Tue, 13 Sep 2011 15:16:33 -0300
+Subject: pci: Don't crash when reading mpss from root complex
+
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+
+commit 1a4b1a41b8a3d5256019854e851beed063b34344 upstream.
+
+In pcie_find_smpss(), we have the following statement:
+
+ if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) ||
+ dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
+
+The problem is that at least on my machine, this gets called for the
+root complex (virtual P2P bridge), and dev->bus->self is NULL since
+the parent bus for this is not itself anchor to a PCI device.
+
+This adds the necessary NULL check.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Acked-by: Jon Mason <mason@myri.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/probe.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -1352,7 +1352,8 @@ static int pcie_find_smpss(struct pci_de
+ * will occur as normal.
+ */
+ if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) ||
+- dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
++ (dev->bus->self &&
++ dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)))
+ *smpss = 0;
+
+ if (*smpss > dev->pcie_mpss)
diff --git a/queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch b/queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch
new file mode 100644
index 0000000000..1812baf6d8
--- /dev/null
+++ b/queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch
@@ -0,0 +1,31 @@
+From debc3b778508f59696ff188f0feca271dcbfa7d9 Mon Sep 17 00:00:00 2001
+From: Jon Mason <mason@myri.com>
+Date: Tue, 2 Aug 2011 00:01:18 -0500
+Subject: PCI: export pcie_bus_configure_settings symbol
+
+From: Jon Mason <mason@myri.com>
+
+commit debc3b778508f59696ff188f0feca271dcbfa7d9 upstream.
+
+pcie_bus_configure_settings needs to be exported if the PCI hotplug
+driver is being compiled as a module.
+
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Signed-off-by: Jon Mason <mason@myri.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/probe.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -1471,6 +1471,7 @@ void pcie_bus_configure_settings(struct
+ pcie_bus_configure_set(bus->self, &smpss);
+ pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
+ }
++EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
+
+ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
+ {
diff --git a/queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch b/queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch
new file mode 100644
index 0000000000..7c0f5cd028
--- /dev/null
+++ b/queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch
@@ -0,0 +1,117 @@
+From ed2888e906b56769b4ffabb9c577190438aa68b8 Mon Sep 17 00:00:00 2001
+From: Jon Mason <mason@myri.com>
+Date: Thu, 8 Sep 2011 16:41:18 -0500
+Subject: PCI: Remove MRRS modification from MPS setting code
+
+From: Jon Mason <mason@myri.com>
+
+commit ed2888e906b56769b4ffabb9c577190438aa68b8 upstream.
+
+Modifying the Maximum Read Request Size to 0 (value of 128Bytes) has
+massive negative ramifications on some devices. Without knowing which
+devices have this issue, do not modify from the default value when
+walking the PCI-E bus in pcie_bus_safe mode. Also, make pcie_bus_safe
+the default procedure.
+
+Tested-by: Sven Schnelle <svens@stackframe.org>
+Tested-by: Simon Kirby <sim@hostway.ca>
+Tested-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
+Reported-and-tested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Reported-and-tested-by: Niels Ole Salscheider <niels_ole@salscheider-online.de>
+References: https://bugzilla.kernel.org/show_bug.cgi?id=42162
+Signed-off-by: Jon Mason <mason@myri.com>
+Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/pci.c | 2 +-
+ drivers/pci/probe.c | 45 ++++++++++++++++++++++++---------------------
+ 2 files changed, 25 insertions(+), 22 deletions(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEF
+ unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE;
+ unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
+
+-enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
++enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
+
+ /*
+ * The default CLS is used if arch didn't set CLS explicitly and not
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -1397,34 +1397,37 @@ static void pcie_write_mps(struct pci_de
+
+ static void pcie_write_mrrs(struct pci_dev *dev, int mps)
+ {
+- int rc, mrrs;
+-
+- if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
+- int dev_mpss = 128 << dev->pcie_mpss;
+-
+- /* For Max performance, the MRRS must be set to the largest
+- * supported value. However, it cannot be configured larger
+- * than the MPS the device or the bus can support. This assumes
+- * that the largest MRRS available on the device cannot be
+- * smaller than the device MPSS.
+- */
+- mrrs = mps < dev_mpss ? mps : dev_mpss;
+- } else
+- /* In the "safe" case, configure the MRRS for fairness on the
+- * bus by making all devices have the same size
+- */
+- mrrs = mps;
++ int rc, mrrs, dev_mpss;
+
++ /* In the "safe" case, do not configure the MRRS. There appear to be
++ * issues with setting MRRS to 0 on a number of devices.
++ */
++
++ if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
++ return;
++
++ dev_mpss = 128 << dev->pcie_mpss;
++
++ /* For Max performance, the MRRS must be set to the largest supported
++ * value. However, it cannot be configured larger than the MPS the
++ * device or the bus can support. This assumes that the largest MRRS
++ * available on the device cannot be smaller than the device MPSS.
++ */
++ mrrs = min(mps, dev_mpss);
+
+ /* MRRS is a R/W register. Invalid values can be written, but a
+- * subsiquent read will verify if the value is acceptable or not.
++ * subsequent read will verify if the value is acceptable or not.
+ * If the MRRS value provided is not acceptable (e.g., too large),
+ * shrink the value until it is acceptable to the HW.
+ */
+ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
++ dev_warn(&dev->dev, "Attempting to modify the PCI-E MRRS value"
++ " to %d. If any issues are encountered, please try "
++ "running with pci=pcie_bus_safe\n", mrrs);
+ rc = pcie_set_readrq(dev, mrrs);
+ if (rc)
+- dev_err(&dev->dev, "Failed attempting to set the MRRS\n");
++ dev_err(&dev->dev,
++ "Failed attempting to set the MRRS\n");
+
+ mrrs /= 2;
+ }
+@@ -1437,13 +1440,13 @@ static int pcie_bus_configure_set(struct
+ if (!pci_is_pcie(dev))
+ return 0;
+
+- dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
++ dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
+ pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
+
+ pcie_write_mps(dev, mps);
+ pcie_write_mrrs(dev, mps);
+
+- dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
++ dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
+ pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
+
+ return 0;
diff --git a/queue-3.0/pci-series b/queue-3.0/pci-series
new file mode 100644
index 0000000000..2d8d8ad280
--- /dev/null
+++ b/queue-3.0/pci-series
@@ -0,0 +1,4 @@
+pci-set-pci-e-max-payload-size-on-fabric.patch
+pci-export-pcie_bus_configure_settings-symbol.patch
+pci-remove-mrrs-modification-from-mps-setting-code.patch
+pci-don-t-crash-when-reading-mpss-from-root-complex.patch
diff --git a/queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch b/queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch
new file mode 100644
index 0000000000..6ca45f3804
--- /dev/null
+++ b/queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch
@@ -0,0 +1,445 @@
+From b03e7495a862b028294f59fc87286d6d78ee7fa1 Mon Sep 17 00:00:00 2001
+From: Jon Mason <mason@myri.com>
+Date: Wed, 20 Jul 2011 15:20:54 -0500
+Subject: PCI: Set PCI-E Max Payload Size on fabric
+
+From: Jon Mason <mason@myri.com>
+
+commit b03e7495a862b028294f59fc87286d6d78ee7fa1 upstream.
+
+On a given PCI-E fabric, each device, bridge, and root port can have a
+different PCI-E maximum payload size. There is a sizable performance
+boost for having the largest possible maximum payload size on each PCI-E
+device. However, if improperly configured, fatal bus errors can occur.
+Thus, it is important to ensure that PCI-E payloads sends by a device
+are never larger than the MPS setting of all devices on the way to the
+destination.
+
+This can be achieved two ways:
+
+- A conservative approach is to use the smallest common denominator of
+ the entire tree below a root complex for every device on that fabric.
+
+This means for example that having a 128 bytes MPS USB controller on one
+leg of a switch will dramatically reduce performances of a video card or
+10GE adapter on another leg of that same switch.
+
+It also means that any hierarchy supporting hotplug slots (including
+expresscard or thunderbolt I suppose, dbl check that) will have to be
+entirely clamped to 128 bytes since we cannot predict what will be
+plugged into those slots, and we cannot change the MPS on a "live"
+system.
+
+- A more optimal way is possible, if it falls within a couple of
+ constraints:
+* The top-level host bridge will never generate packets larger than the
+ smallest TLP (or if it can be controlled independently from its MPS at
+ least)
+* The device will never generate packets larger than MPS (which can be
+ configured via MRRS)
+* No support of direct PCI-E <-> PCI-E transfers between devices without
+ some additional code to specifically deal with that case
+
+Then we can use an approach that basically ignores downstream requests
+and focuses exclusively on upstream requests. In that case, all we need
+to care about is that a device MPS is no larger than its parent MPS,
+which allows us to keep all switches/bridges to the max MPS supported by
+their parent and eventually the PHB.
+
+In this case, your USB controller would no longer "starve" your 10GE
+Ethernet and your hotplug slots won't affect your global MPS.
+Additionally, the hotplugged devices themselves can be configured to a
+larger MPS up to the value configured in the hotplug bridge.
+
+To choose between the two available options, two PCI kernel boot args
+have been added to the PCI calls. "pcie_bus_safe" will provide the
+former behavior, while "pcie_bus_perf" will perform the latter behavior.
+By default, the latter behavior is used.
+
+NOTE: due to the location of the enablement, each arch will need to add
+calls to this function. This patch only enables x86.
+
+This patch includes a number of changes recommended by Benjamin
+Herrenschmidt.
+
+Tested-by: Jordan_Hargrave@dell.com
+Signed-off-by: Jon Mason <mason@myri.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/pci/acpi.c | 9 ++
+ drivers/pci/hotplug/pcihp_slot.c | 45 ------------
+ drivers/pci/pci.c | 67 ++++++++++++++++++
+ drivers/pci/probe.c | 145 +++++++++++++++++++++++++++++++++++++++
+ include/linux/pci.h | 15 +++-
+ 5 files changed, 236 insertions(+), 45 deletions(-)
+
+--- a/arch/x86/pci/acpi.c
++++ b/arch/x86/pci/acpi.c
+@@ -361,6 +361,15 @@ struct pci_bus * __devinit pci_acpi_scan
+ }
+ }
+
++ /* After the PCI-E bus has been walked and all devices discovered,
++ * configure any settings of the fabric that might be necessary.
++ */
++ if (bus) {
++ struct pci_bus *child;
++ list_for_each_entry(child, &bus->children, node)
++ pcie_bus_configure_settings(child, child->self->pcie_mpss);
++ }
++
+ if (!bus)
+ kfree(sd);
+
+--- a/drivers/pci/hotplug/pcihp_slot.c
++++ b/drivers/pci/hotplug/pcihp_slot.c
+@@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci
+ */
+ }
+
+-/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */
+-static int pci_set_payload(struct pci_dev *dev)
+-{
+- int pos, ppos;
+- u16 pctl, psz;
+- u16 dctl, dsz, dcap, dmax;
+- struct pci_dev *parent;
+-
+- parent = dev->bus->self;
+- pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+- if (!pos)
+- return 0;
+-
+- /* Read Device MaxPayload capability and setting */
+- pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl);
+- pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap);
+- dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
+- dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD);
+-
+- /* Read Parent MaxPayload setting */
+- ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
+- if (!ppos)
+- return 0;
+- pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
+- psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
+-
+- /* If parent payload > device max payload -> error
+- * If parent payload > device payload -> set speed
+- * If parent payload <= device payload -> do nothing
+- */
+- if (psz > dmax)
+- return -1;
+- else if (psz > dsz) {
+- dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz);
+- pci_write_config_word(dev, pos + PCI_EXP_DEVCTL,
+- (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) +
+- (psz << 5));
+- }
+- return 0;
+-}
+-
+ void pci_configure_slot(struct pci_dev *dev)
+ {
+ struct pci_dev *cdev;
+@@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev *
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
+ return;
+
+- ret = pci_set_payload(dev);
+- if (ret)
+- dev_warn(&dev->dev, "could not set device max payload\n");
++ pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss);
+
+ memset(&hpp, 0, sizeof(hpp));
+ ret = pci_get_hp_params(dev, &hpp);
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEF
+ unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE;
+ unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
+
++enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
++
+ /*
+ * The default CLS is used if arch didn't set CLS explicitly and not
+ * all pci devices agree on the same value. Arch can override either
+@@ -3223,6 +3225,67 @@ out:
+ EXPORT_SYMBOL(pcie_set_readrq);
+
+ /**
++ * pcie_get_mps - get PCI Express maximum payload size
++ * @dev: PCI device to query
++ *
++ * Returns maximum payload size in bytes
++ * or appropriate error value.
++ */
++int pcie_get_mps(struct pci_dev *dev)
++{
++ int ret, cap;
++ u16 ctl;
++
++ cap = pci_pcie_cap(dev);
++ if (!cap)
++ return -EINVAL;
++
++ ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
++ if (!ret)
++ ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5);
++
++ return ret;
++}
++
++/**
++ * pcie_set_mps - set PCI Express maximum payload size
++ * @dev: PCI device to query
++ * @rq: maximum payload size in bytes
++ * valid values are 128, 256, 512, 1024, 2048, 4096
++ *
++ * If possible sets maximum payload size
++ */
++int pcie_set_mps(struct pci_dev *dev, int mps)
++{
++ int cap, err = -EINVAL;
++ u16 ctl, v;
++
++ if (mps < 128 || mps > 4096 || !is_power_of_2(mps))
++ goto out;
++
++ v = ffs(mps) - 8;
++ if (v > dev->pcie_mpss)
++ goto out;
++ v <<= 5;
++
++ cap = pci_pcie_cap(dev);
++ if (!cap)
++ goto out;
++
++ err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
++ if (err)
++ goto out;
++
++ if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) {
++ ctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
++ ctl |= v;
++ err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl);
++ }
++out:
++ return err;
++}
++
++/**
+ * pci_select_bars - Make BAR mask from the type of resource
+ * @dev: the PCI device for which BAR mask is made
+ * @flags: resource type mask to be selected
+@@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str)
+ pci_hotplug_io_size = memparse(str + 9, &str);
+ } else if (!strncmp(str, "hpmemsize=", 10)) {
+ pci_hotplug_mem_size = memparse(str + 10, &str);
++ } else if (!strncmp(str, "pcie_bus_safe", 13)) {
++ pcie_bus_config = PCIE_BUS_SAFE;
++ } else if (!strncmp(str, "pcie_bus_perf", 13)) {
++ pcie_bus_config = PCIE_BUS_PERFORMANCE;
+ } else {
+ printk(KERN_ERR "PCI: Unknown option `%s'\n",
+ str);
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -860,6 +860,8 @@ void set_pcie_port_type(struct pci_dev *
+ pdev->pcie_cap = pos;
+ pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+ pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
++ pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
++ pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
+ }
+
+ void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+@@ -1327,6 +1329,149 @@ int pci_scan_slot(struct pci_bus *bus, i
+ return nr;
+ }
+
++static int pcie_find_smpss(struct pci_dev *dev, void *data)
++{
++ u8 *smpss = data;
++
++ if (!pci_is_pcie(dev))
++ return 0;
++
++ /* For PCIE hotplug enabled slots not connected directly to a
++ * PCI-E root port, there can be problems when hotplugging
++ * devices. This is due to the possibility of hotplugging a
++ * device into the fabric with a smaller MPS that the devices
++ * currently running have configured. Modifying the MPS on the
++ * running devices could cause a fatal bus error due to an
++ * incoming frame being larger than the newly configured MPS.
++ * To work around this, the MPS for the entire fabric must be
++ * set to the minimum size. Any devices hotplugged into this
++ * fabric will have the minimum MPS set. If the PCI hotplug
++ * slot is directly connected to the root port and there are not
++ * other devices on the fabric (which seems to be the most
++ * common case), then this is not an issue and MPS discovery
++ * will occur as normal.
++ */
++ if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) ||
++ dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
++ *smpss = 0;
++
++ if (*smpss > dev->pcie_mpss)
++ *smpss = dev->pcie_mpss;
++
++ return 0;
++}
++
++static void pcie_write_mps(struct pci_dev *dev, int mps)
++{
++ int rc, dev_mpss;
++
++ dev_mpss = 128 << dev->pcie_mpss;
++
++ if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
++ if (dev->bus->self) {
++ dev_dbg(&dev->bus->dev, "Bus MPSS %d\n",
++ 128 << dev->bus->self->pcie_mpss);
++
++ /* For "MPS Force Max", the assumption is made that
++ * downstream communication will never be larger than
++ * the MRRS. So, the MPS only needs to be configured
++ * for the upstream communication. This being the case,
++ * walk from the top down and set the MPS of the child
++ * to that of the parent bus.
++ */
++ mps = 128 << dev->bus->self->pcie_mpss;
++ if (mps > dev_mpss)
++ dev_warn(&dev->dev, "MPS configured higher than"
++ " maximum supported by the device. If"
++ " a bus issue occurs, try running with"
++ " pci=pcie_bus_safe.\n");
++ }
++
++ dev->pcie_mpss = ffs(mps) - 8;
++ }
++
++ rc = pcie_set_mps(dev, mps);
++ if (rc)
++ dev_err(&dev->dev, "Failed attempting to set the MPS\n");
++}
++
++static void pcie_write_mrrs(struct pci_dev *dev, int mps)
++{
++ int rc, mrrs;
++
++ if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
++ int dev_mpss = 128 << dev->pcie_mpss;
++
++ /* For Max performance, the MRRS must be set to the largest
++ * supported value. However, it cannot be configured larger
++ * than the MPS the device or the bus can support. This assumes
++ * that the largest MRRS available on the device cannot be
++ * smaller than the device MPSS.
++ */
++ mrrs = mps < dev_mpss ? mps : dev_mpss;
++ } else
++ /* In the "safe" case, configure the MRRS for fairness on the
++ * bus by making all devices have the same size
++ */
++ mrrs = mps;
++
++
++ /* MRRS is a R/W register. Invalid values can be written, but a
++ * subsiquent read will verify if the value is acceptable or not.
++ * If the MRRS value provided is not acceptable (e.g., too large),
++ * shrink the value until it is acceptable to the HW.
++ */
++ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
++ rc = pcie_set_readrq(dev, mrrs);
++ if (rc)
++ dev_err(&dev->dev, "Failed attempting to set the MRRS\n");
++
++ mrrs /= 2;
++ }
++}
++
++static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
++{
++ int mps = 128 << *(u8 *)data;
++
++ if (!pci_is_pcie(dev))
++ return 0;
++
++ dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
++ pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
++
++ pcie_write_mps(dev, mps);
++ pcie_write_mrrs(dev, mps);
++
++ dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
++ pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
++
++ return 0;
++}
++
++/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down,
++ * parents then children fashion. If this changes, then this code will not
++ * work as designed.
++ */
++void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
++{
++ u8 smpss = mpss;
++
++ if (!bus->self)
++ return;
++
++ if (!pci_is_pcie(bus->self))
++ return;
++
++ if (pcie_bus_config == PCIE_BUS_SAFE) {
++ pcie_find_smpss(bus->self, &smpss);
++ pci_walk_bus(bus, pcie_find_smpss, &smpss);
++ }
++
++ pcie_bus_configure_set(bus->self, &smpss);
++ pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
++}
++
+ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
+ {
+ unsigned int devfn, pass, max = bus->secondary;
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -251,7 +251,8 @@ struct pci_dev {
+ u8 revision; /* PCI revision, low byte of class word */
+ u8 hdr_type; /* PCI header type (`multi' flag masked out) */
+ u8 pcie_cap; /* PCI-E capability offset */
+- u8 pcie_type; /* PCI-E device/port type */
++ u8 pcie_type:4; /* PCI-E device/port type */
++ u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */
+ u8 rom_base_reg; /* which config register controls the ROM */
+ u8 pin; /* which interrupt pin this device uses */
+
+@@ -617,6 +618,16 @@ struct pci_driver {
+ /* these external functions are only available when PCI support is enabled */
+ #ifdef CONFIG_PCI
+
++extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss);
++
++enum pcie_bus_config_types {
++ PCIE_BUS_PERFORMANCE,
++ PCIE_BUS_SAFE,
++ PCIE_BUS_PEER2PEER,
++};
++
++extern enum pcie_bus_config_types pcie_bus_config;
++
+ extern struct bus_type pci_bus_type;
+
+ /* Do NOT directly access these two variables, unless you are arch specific pci
+@@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev);
+ int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
+ int pcie_get_readrq(struct pci_dev *dev);
+ int pcie_set_readrq(struct pci_dev *dev, int rq);
++int pcie_get_mps(struct pci_dev *dev);
++int pcie_set_mps(struct pci_dev *dev, int mps);
+ int __pci_reset_function(struct pci_dev *dev);
+ int pci_reset_function(struct pci_dev *dev);
+ void pci_update_resource(struct pci_dev *dev, int resno);
diff --git a/queue-3.0/sched-fix-up-wchan-borkage.patch b/queue-3.0/sched-fix-up-wchan-borkage.patch
new file mode 100644
index 0000000000..bc7eff7e6c
--- /dev/null
+++ b/queue-3.0/sched-fix-up-wchan-borkage.patch
@@ -0,0 +1,35 @@
+From 6ebbe7a07b3bc40b168d2afc569a6543c020d2e3 Mon Sep 17 00:00:00 2001
+From: Simon Kirby <sim@hostway.ca>
+Date: Thu, 22 Sep 2011 17:03:46 -0700
+Subject: sched: Fix up wchan borkage
+
+From: Simon Kirby <sim@hostway.ca>
+
+commit 6ebbe7a07b3bc40b168d2afc569a6543c020d2e3 upstream.
+
+Commit c259e01a1ec ("sched: Separate the scheduler entry for
+preemption") contained a boo-boo wrecking wchan output. It forgot to
+put the new schedule() function in the __sched section and thereby
+doesn't get properly ignored for things like wchan.
+
+Tested-by: Simon Kirby <sim@hostway.ca>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/20110923000346.GA25425@hostway.ca
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4335,7 +4335,7 @@ static inline void sched_submit_work(str
+ blk_schedule_flush_plug(tsk);
+ }
+
+-asmlinkage void schedule(void)
++asmlinkage void __sched schedule(void)
+ {
+ struct task_struct *tsk = current;
+
diff --git a/queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch b/queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch
new file mode 100644
index 0000000000..4f3aba4eaa
--- /dev/null
+++ b/queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch
@@ -0,0 +1,57 @@
+From 3be209a8e22cedafc1b6945608b7bb8d9887ab61 Mon Sep 17 00:00:00 2001
+From: Shawn Bohrer <sbohrer@rgmadvisors.com>
+Date: Mon, 12 Sep 2011 09:28:04 -0500
+Subject: sched/rt: Migrate equal priority tasks to available CPUs
+
+From: Shawn Bohrer <sbohrer@rgmadvisors.com>
+
+commit 3be209a8e22cedafc1b6945608b7bb8d9887ab61 upstream.
+
+Commit 43fa5460fe60dea5c610490a1d263415419c60f6 ("sched: Try not to
+migrate higher priority RT tasks") also introduced a change in behavior
+which keeps RT tasks on the same CPU if there is an equal priority RT
+task currently running even if there are empty CPUs available.
+
+This can cause unnecessary wakeup latencies, and can prevent the
+scheduler from balancing all RT tasks across available CPUs.
+
+This change causes an RT task to search for a new CPU if an equal
+priority RT task is already running on wakeup. Lower priority tasks
+will still have to wait on higher priority tasks, but the system should
+still balance out because there is always the possibility that if there
+are both a high and low priority RT tasks on a given CPU that the high
+priority task could wakeup while the low priority task is running and
+force it to search for a better runqueue.
+
+Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Tested-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/1315837684-18733-1-git-send-email-sbohrer@rgmadvisors.com
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched_rt.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -1038,7 +1038,7 @@ select_task_rq_rt(struct task_struct *p,
+ */
+ if (curr && unlikely(rt_task(curr)) &&
+ (curr->rt.nr_cpus_allowed < 2 ||
+- curr->prio < p->prio) &&
++ curr->prio <= p->prio) &&
+ (p->rt.nr_cpus_allowed > 1)) {
+ int target = find_lowest_rq(p);
+
+@@ -1569,7 +1569,7 @@ static void task_woken_rt(struct rq *rq,
+ p->rt.nr_cpus_allowed > 1 &&
+ rt_task(rq->curr) &&
+ (rq->curr->rt.nr_cpus_allowed < 2 ||
+- rq->curr->prio < p->prio))
++ rq->curr->prio <= p->prio))
+ push_rt_tasks(rq);
+ }
+
diff --git a/queue-3.0/series b/queue-3.0/series
new file mode 100644
index 0000000000..bd2b856cef
--- /dev/null
+++ b/queue-3.0/series
@@ -0,0 +1,9 @@
+pci-set-pci-e-max-payload-size-on-fabric.patch
+pci-export-pcie_bus_configure_settings-symbol.patch
+pci-remove-mrrs-modification-from-mps-setting-code.patch
+pci-don-t-crash-when-reading-mpss-from-root-complex.patch
+sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch
+
+sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch
+sched-fix-up-wchan-borkage.patch
+ide-disk-fix-request-requeuing.patch
diff --git a/queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch b/queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch
new file mode 100644
index 0000000000..19527e09e9
--- /dev/null
+++ b/queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch
@@ -0,0 +1,58 @@
+From f4142cba4e4065a416e78ade905bea29ff3930e6 Mon Sep 17 00:00:00 2001
+From: "David S. Miller" <davem@davemloft.net>
+Date: Thu, 29 Sep 2011 12:18:59 -0700
+Subject: sparc64: Force the execute bit in OpenFirmware's translation entries.
+Status: RO
+Content-Length: 1822
+Lines: 48
+
+From: "David S. Miller" <davem@davemloft.net>
+
+In the OF 'translations' property, the template TTEs in the mappings
+never specify the executable bit. This is the case even though some
+of these mappings are for OF's code segment.
+
+Therefore, we need to force the execute bit on in every mapping.
+
+This problem can only really trigger on Niagara/sun4v machines and the
+history behind this is a little complicated.
+
+Previous to sun4v, the sun4u TTE entries lacked a hardware execute
+permission bit. So OF didn't have to ever worry about setting
+anything to handle executable pages. Any valid TTE loaded into the
+I-TLB would be respected by the chip.
+
+But sun4v Niagara chips have a real hardware enforced executable bit
+in their TTEs. So it has to be set or else the I-TLB throws an
+instruction access exception with type code 6 (protection violation).
+
+We've been extremely fortunate to not get bitten by this in the past.
+
+The best I can tell is that the OF's mappings for it's executable code
+were mapped using permanent locked mappings on sun4v in the past.
+Therefore, the fact that we didn't have the exec bit set in the OF
+translations we would use did not matter in practice.
+
+Thanks to Greg Onufer for helping me track this down.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/sparc/mm/init_64.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -511,6 +511,11 @@ static void __init read_obp_translations
+ for (i = 0; i < prom_trans_ents; i++)
+ prom_trans[i].data &= ~0x0003fe0000000000UL;
+ }
++
++ /* Force execute bit on. */
++ for (i = 0; i < prom_trans_ents; i++)
++ prom_trans[i].data |= (tlb_type == hypervisor ?
++ _PAGE_EXEC_4V : _PAGE_EXEC_4U);
+ }
+
+ static void __init hypervisor_tlb_lock(unsigned long vaddr,