diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-10-03 14:36:48 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-10-03 14:36:48 -0700 |
commit | 410449452dd3868bff8971f247c7f42652d7bab3 (patch) | |
tree | c85585ed990683c9ef2ce96a98e1a7bcb1fd86e7 | |
parent | 380dd8e0ffba79547f9346ebb6a943eae837c2b5 (diff) | |
download | stable-queue-410449452dd3868bff8971f247c7f42652d7bab3.tar.gz |
3.0 patches
10 files changed, 844 insertions, 0 deletions
diff --git a/queue-3.0/ide-disk-fix-request-requeuing.patch b/queue-3.0/ide-disk-fix-request-requeuing.patch new file mode 100644 index 0000000000..13b2644cc4 --- /dev/null +++ b/queue-3.0/ide-disk-fix-request-requeuing.patch @@ -0,0 +1,47 @@ +From 2c8fc867602e385fd2abe76da0b6bda8ed907547 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@alien8.de> +Date: Mon, 3 Oct 2011 14:28:18 -0400 +Subject: ide-disk: Fix request requeuing + +From: Borislav Petkov <bp@alien8.de> + +commit 2c8fc867602e385fd2abe76da0b6bda8ed907547 upstream. + +Simon Kirby reported that on his RAID setup with idedisk underneath +the box OOMs after a couple of days of runtime. Running with +CONFIG_DEBUG_KMEMLEAK pointed to idedisk_prep_fn() which unconditionally +allocates an ide_cmd struct. However, ide_requeue_and_plug() can be +called more than once per request, either from the request issue or the +IRQ handler path and do blk_peek_request() ends up in idedisk_prep_fn() +repeatedly, allocating a struct ide_cmd everytime and "forgetting" the +previous pointer. + +Make sure the code reuses the old allocated chunk. + +Reported-and-tested-by: Simon Kirby <sim@hostway.ca> +Link: http://marc.info/?l=linux-kernel&m=131667641517919 +Link: http://lkml.kernel.org/r/20110922072643.GA27232@hostway.ca +Signed-off-by: Borislav Petkov <bp@alien8.de> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/ide/ide-disk.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/ide/ide-disk.c ++++ b/drivers/ide/ide-disk.c +@@ -435,7 +435,12 @@ static int idedisk_prep_fn(struct reques + if (!(rq->cmd_flags & REQ_FLUSH)) + return BLKPREP_OK; + +- cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); ++ if (rq->special) { ++ cmd = rq->special; ++ memset(cmd, 0, sizeof(*cmd)); ++ } else { ++ cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); ++ } + + /* FIXME: map struct ide_taskfile on rq->cmd[] */ + BUG_ON(cmd == NULL); diff --git a/queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch b/queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch new file mode 100644 index 0000000000..9fcb7fdc48 --- /dev/null +++ b/queue-3.0/pci-don-t-crash-when-reading-mpss-from-root-complex.patch @@ -0,0 +1,41 @@ +From 1a4b1a41b8a3d5256019854e851beed063b34344 Mon Sep 17 00:00:00 2001 +From: Benjamin Herrenschmidt <benh@kernel.crashing.org> +Date: Tue, 13 Sep 2011 15:16:33 -0300 +Subject: pci: Don't crash when reading mpss from root complex + +From: Benjamin Herrenschmidt <benh@kernel.crashing.org> + +commit 1a4b1a41b8a3d5256019854e851beed063b34344 upstream. + +In pcie_find_smpss(), we have the following statement: + + if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || + dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + +The problem is that at least on my machine, this gets called for the +root complex (virtual P2P bridge), and dev->bus->self is NULL since +the parent bus for this is not itself anchor to a PCI device. + +This adds the necessary NULL check. + +Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> +Acked-by: Jon Mason <mason@myri.com> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/pci/probe.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -1352,7 +1352,8 @@ static int pcie_find_smpss(struct pci_de + * will occur as normal. + */ + if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || +- dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) ++ (dev->bus->self && ++ dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT))) + *smpss = 0; + + if (*smpss > dev->pcie_mpss) diff --git a/queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch b/queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch new file mode 100644 index 0000000000..1812baf6d8 --- /dev/null +++ b/queue-3.0/pci-export-pcie_bus_configure_settings-symbol.patch @@ -0,0 +1,31 @@ +From debc3b778508f59696ff188f0feca271dcbfa7d9 Mon Sep 17 00:00:00 2001 +From: Jon Mason <mason@myri.com> +Date: Tue, 2 Aug 2011 00:01:18 -0500 +Subject: PCI: export pcie_bus_configure_settings symbol + +From: Jon Mason <mason@myri.com> + +commit debc3b778508f59696ff188f0feca271dcbfa7d9 upstream. + +pcie_bus_configure_settings needs to be exported if the PCI hotplug +driver is being compiled as a module. + +Reported-by: Stephen Rothwell <sfr@canb.auug.org.au> +Signed-off-by: Jon Mason <mason@myri.com> +Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/pci/probe.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -1471,6 +1471,7 @@ void pcie_bus_configure_settings(struct + pcie_bus_configure_set(bus->self, &smpss); + pci_walk_bus(bus, pcie_bus_configure_set, &smpss); + } ++EXPORT_SYMBOL_GPL(pcie_bus_configure_settings); + + unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) + { diff --git a/queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch b/queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch new file mode 100644 index 0000000000..7c0f5cd028 --- /dev/null +++ b/queue-3.0/pci-remove-mrrs-modification-from-mps-setting-code.patch @@ -0,0 +1,117 @@ +From ed2888e906b56769b4ffabb9c577190438aa68b8 Mon Sep 17 00:00:00 2001 +From: Jon Mason <mason@myri.com> +Date: Thu, 8 Sep 2011 16:41:18 -0500 +Subject: PCI: Remove MRRS modification from MPS setting code + +From: Jon Mason <mason@myri.com> + +commit ed2888e906b56769b4ffabb9c577190438aa68b8 upstream. + +Modifying the Maximum Read Request Size to 0 (value of 128Bytes) has +massive negative ramifications on some devices. Without knowing which +devices have this issue, do not modify from the default value when +walking the PCI-E bus in pcie_bus_safe mode. Also, make pcie_bus_safe +the default procedure. + +Tested-by: Sven Schnelle <svens@stackframe.org> +Tested-by: Simon Kirby <sim@hostway.ca> +Tested-by: Stephen M. Cameron <scameron@beardog.cce.hp.com> +Reported-and-tested-by: Eric Dumazet <eric.dumazet@gmail.com> +Reported-and-tested-by: Niels Ole Salscheider <niels_ole@salscheider-online.de> +References: https://bugzilla.kernel.org/show_bug.cgi?id=42162 +Signed-off-by: Jon Mason <mason@myri.com> +Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/pci/pci.c | 2 +- + drivers/pci/probe.c | 45 ++++++++++++++++++++++++--------------------- + 2 files changed, 25 insertions(+), 22 deletions(-) + +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEF + unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; + unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; + +-enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; ++enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE; + + /* + * The default CLS is used if arch didn't set CLS explicitly and not +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -1397,34 +1397,37 @@ static void pcie_write_mps(struct pci_de + + static void pcie_write_mrrs(struct pci_dev *dev, int mps) + { +- int rc, mrrs; +- +- if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { +- int dev_mpss = 128 << dev->pcie_mpss; +- +- /* For Max performance, the MRRS must be set to the largest +- * supported value. However, it cannot be configured larger +- * than the MPS the device or the bus can support. This assumes +- * that the largest MRRS available on the device cannot be +- * smaller than the device MPSS. +- */ +- mrrs = mps < dev_mpss ? mps : dev_mpss; +- } else +- /* In the "safe" case, configure the MRRS for fairness on the +- * bus by making all devices have the same size +- */ +- mrrs = mps; ++ int rc, mrrs, dev_mpss; + ++ /* In the "safe" case, do not configure the MRRS. There appear to be ++ * issues with setting MRRS to 0 on a number of devices. ++ */ ++ ++ if (pcie_bus_config != PCIE_BUS_PERFORMANCE) ++ return; ++ ++ dev_mpss = 128 << dev->pcie_mpss; ++ ++ /* For Max performance, the MRRS must be set to the largest supported ++ * value. However, it cannot be configured larger than the MPS the ++ * device or the bus can support. This assumes that the largest MRRS ++ * available on the device cannot be smaller than the device MPSS. ++ */ ++ mrrs = min(mps, dev_mpss); + + /* MRRS is a R/W register. Invalid values can be written, but a +- * subsiquent read will verify if the value is acceptable or not. ++ * subsequent read will verify if the value is acceptable or not. + * If the MRRS value provided is not acceptable (e.g., too large), + * shrink the value until it is acceptable to the HW. + */ + while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { ++ dev_warn(&dev->dev, "Attempting to modify the PCI-E MRRS value" ++ " to %d. If any issues are encountered, please try " ++ "running with pci=pcie_bus_safe\n", mrrs); + rc = pcie_set_readrq(dev, mrrs); + if (rc) +- dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); ++ dev_err(&dev->dev, ++ "Failed attempting to set the MRRS\n"); + + mrrs /= 2; + } +@@ -1437,13 +1440,13 @@ static int pcie_bus_configure_set(struct + if (!pci_is_pcie(dev)) + return 0; + +- dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", ++ dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev)); + + pcie_write_mps(dev, mps); + pcie_write_mrrs(dev, mps); + +- dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", ++ dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev)); + + return 0; diff --git a/queue-3.0/pci-series b/queue-3.0/pci-series new file mode 100644 index 0000000000..2d8d8ad280 --- /dev/null +++ b/queue-3.0/pci-series @@ -0,0 +1,4 @@ +pci-set-pci-e-max-payload-size-on-fabric.patch +pci-export-pcie_bus_configure_settings-symbol.patch +pci-remove-mrrs-modification-from-mps-setting-code.patch +pci-don-t-crash-when-reading-mpss-from-root-complex.patch diff --git a/queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch b/queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch new file mode 100644 index 0000000000..6ca45f3804 --- /dev/null +++ b/queue-3.0/pci-set-pci-e-max-payload-size-on-fabric.patch @@ -0,0 +1,445 @@ +From b03e7495a862b028294f59fc87286d6d78ee7fa1 Mon Sep 17 00:00:00 2001 +From: Jon Mason <mason@myri.com> +Date: Wed, 20 Jul 2011 15:20:54 -0500 +Subject: PCI: Set PCI-E Max Payload Size on fabric + +From: Jon Mason <mason@myri.com> + +commit b03e7495a862b028294f59fc87286d6d78ee7fa1 upstream. + +On a given PCI-E fabric, each device, bridge, and root port can have a +different PCI-E maximum payload size. There is a sizable performance +boost for having the largest possible maximum payload size on each PCI-E +device. However, if improperly configured, fatal bus errors can occur. +Thus, it is important to ensure that PCI-E payloads sends by a device +are never larger than the MPS setting of all devices on the way to the +destination. + +This can be achieved two ways: + +- A conservative approach is to use the smallest common denominator of + the entire tree below a root complex for every device on that fabric. + +This means for example that having a 128 bytes MPS USB controller on one +leg of a switch will dramatically reduce performances of a video card or +10GE adapter on another leg of that same switch. + +It also means that any hierarchy supporting hotplug slots (including +expresscard or thunderbolt I suppose, dbl check that) will have to be +entirely clamped to 128 bytes since we cannot predict what will be +plugged into those slots, and we cannot change the MPS on a "live" +system. + +- A more optimal way is possible, if it falls within a couple of + constraints: +* The top-level host bridge will never generate packets larger than the + smallest TLP (or if it can be controlled independently from its MPS at + least) +* The device will never generate packets larger than MPS (which can be + configured via MRRS) +* No support of direct PCI-E <-> PCI-E transfers between devices without + some additional code to specifically deal with that case + +Then we can use an approach that basically ignores downstream requests +and focuses exclusively on upstream requests. In that case, all we need +to care about is that a device MPS is no larger than its parent MPS, +which allows us to keep all switches/bridges to the max MPS supported by +their parent and eventually the PHB. + +In this case, your USB controller would no longer "starve" your 10GE +Ethernet and your hotplug slots won't affect your global MPS. +Additionally, the hotplugged devices themselves can be configured to a +larger MPS up to the value configured in the hotplug bridge. + +To choose between the two available options, two PCI kernel boot args +have been added to the PCI calls. "pcie_bus_safe" will provide the +former behavior, while "pcie_bus_perf" will perform the latter behavior. +By default, the latter behavior is used. + +NOTE: due to the location of the enablement, each arch will need to add +calls to this function. This patch only enables x86. + +This patch includes a number of changes recommended by Benjamin +Herrenschmidt. + +Tested-by: Jordan_Hargrave@dell.com +Signed-off-by: Jon Mason <mason@myri.com> +Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/x86/pci/acpi.c | 9 ++ + drivers/pci/hotplug/pcihp_slot.c | 45 ------------ + drivers/pci/pci.c | 67 ++++++++++++++++++ + drivers/pci/probe.c | 145 +++++++++++++++++++++++++++++++++++++++ + include/linux/pci.h | 15 +++- + 5 files changed, 236 insertions(+), 45 deletions(-) + +--- a/arch/x86/pci/acpi.c ++++ b/arch/x86/pci/acpi.c +@@ -361,6 +361,15 @@ struct pci_bus * __devinit pci_acpi_scan + } + } + ++ /* After the PCI-E bus has been walked and all devices discovered, ++ * configure any settings of the fabric that might be necessary. ++ */ ++ if (bus) { ++ struct pci_bus *child; ++ list_for_each_entry(child, &bus->children, node) ++ pcie_bus_configure_settings(child, child->self->pcie_mpss); ++ } ++ + if (!bus) + kfree(sd); + +--- a/drivers/pci/hotplug/pcihp_slot.c ++++ b/drivers/pci/hotplug/pcihp_slot.c +@@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci + */ + } + +-/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ +-static int pci_set_payload(struct pci_dev *dev) +-{ +- int pos, ppos; +- u16 pctl, psz; +- u16 dctl, dsz, dcap, dmax; +- struct pci_dev *parent; +- +- parent = dev->bus->self; +- pos = pci_find_capability(dev, PCI_CAP_ID_EXP); +- if (!pos) +- return 0; +- +- /* Read Device MaxPayload capability and setting */ +- pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); +- pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); +- dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; +- dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); +- +- /* Read Parent MaxPayload setting */ +- ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); +- if (!ppos) +- return 0; +- pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); +- psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; +- +- /* If parent payload > device max payload -> error +- * If parent payload > device payload -> set speed +- * If parent payload <= device payload -> do nothing +- */ +- if (psz > dmax) +- return -1; +- else if (psz > dsz) { +- dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); +- pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, +- (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + +- (psz << 5)); +- } +- return 0; +-} +- + void pci_configure_slot(struct pci_dev *dev) + { + struct pci_dev *cdev; +@@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev * + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) + return; + +- ret = pci_set_payload(dev); +- if (ret) +- dev_warn(&dev->dev, "could not set device max payload\n"); ++ pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss); + + memset(&hpp, 0, sizeof(hpp)); + ret = pci_get_hp_params(dev, &hpp); +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEF + unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; + unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; + ++enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; ++ + /* + * The default CLS is used if arch didn't set CLS explicitly and not + * all pci devices agree on the same value. Arch can override either +@@ -3223,6 +3225,67 @@ out: + EXPORT_SYMBOL(pcie_set_readrq); + + /** ++ * pcie_get_mps - get PCI Express maximum payload size ++ * @dev: PCI device to query ++ * ++ * Returns maximum payload size in bytes ++ * or appropriate error value. ++ */ ++int pcie_get_mps(struct pci_dev *dev) ++{ ++ int ret, cap; ++ u16 ctl; ++ ++ cap = pci_pcie_cap(dev); ++ if (!cap) ++ return -EINVAL; ++ ++ ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); ++ if (!ret) ++ ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); ++ ++ return ret; ++} ++ ++/** ++ * pcie_set_mps - set PCI Express maximum payload size ++ * @dev: PCI device to query ++ * @rq: maximum payload size in bytes ++ * valid values are 128, 256, 512, 1024, 2048, 4096 ++ * ++ * If possible sets maximum payload size ++ */ ++int pcie_set_mps(struct pci_dev *dev, int mps) ++{ ++ int cap, err = -EINVAL; ++ u16 ctl, v; ++ ++ if (mps < 128 || mps > 4096 || !is_power_of_2(mps)) ++ goto out; ++ ++ v = ffs(mps) - 8; ++ if (v > dev->pcie_mpss) ++ goto out; ++ v <<= 5; ++ ++ cap = pci_pcie_cap(dev); ++ if (!cap) ++ goto out; ++ ++ err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); ++ if (err) ++ goto out; ++ ++ if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) { ++ ctl &= ~PCI_EXP_DEVCTL_PAYLOAD; ++ ctl |= v; ++ err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl); ++ } ++out: ++ return err; ++} ++ ++/** + * pci_select_bars - Make BAR mask from the type of resource + * @dev: the PCI device for which BAR mask is made + * @flags: resource type mask to be selected +@@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str) + pci_hotplug_io_size = memparse(str + 9, &str); + } else if (!strncmp(str, "hpmemsize=", 10)) { + pci_hotplug_mem_size = memparse(str + 10, &str); ++ } else if (!strncmp(str, "pcie_bus_safe", 13)) { ++ pcie_bus_config = PCIE_BUS_SAFE; ++ } else if (!strncmp(str, "pcie_bus_perf", 13)) { ++ pcie_bus_config = PCIE_BUS_PERFORMANCE; + } else { + printk(KERN_ERR "PCI: Unknown option `%s'\n", + str); +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -860,6 +860,8 @@ void set_pcie_port_type(struct pci_dev * + pdev->pcie_cap = pos; + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; ++ pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); ++ pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; + } + + void set_pcie_hotplug_bridge(struct pci_dev *pdev) +@@ -1327,6 +1329,149 @@ int pci_scan_slot(struct pci_bus *bus, i + return nr; + } + ++static int pcie_find_smpss(struct pci_dev *dev, void *data) ++{ ++ u8 *smpss = data; ++ ++ if (!pci_is_pcie(dev)) ++ return 0; ++ ++ /* For PCIE hotplug enabled slots not connected directly to a ++ * PCI-E root port, there can be problems when hotplugging ++ * devices. This is due to the possibility of hotplugging a ++ * device into the fabric with a smaller MPS that the devices ++ * currently running have configured. Modifying the MPS on the ++ * running devices could cause a fatal bus error due to an ++ * incoming frame being larger than the newly configured MPS. ++ * To work around this, the MPS for the entire fabric must be ++ * set to the minimum size. Any devices hotplugged into this ++ * fabric will have the minimum MPS set. If the PCI hotplug ++ * slot is directly connected to the root port and there are not ++ * other devices on the fabric (which seems to be the most ++ * common case), then this is not an issue and MPS discovery ++ * will occur as normal. ++ */ ++ if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || ++ dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) ++ *smpss = 0; ++ ++ if (*smpss > dev->pcie_mpss) ++ *smpss = dev->pcie_mpss; ++ ++ return 0; ++} ++ ++static void pcie_write_mps(struct pci_dev *dev, int mps) ++{ ++ int rc, dev_mpss; ++ ++ dev_mpss = 128 << dev->pcie_mpss; ++ ++ if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { ++ if (dev->bus->self) { ++ dev_dbg(&dev->bus->dev, "Bus MPSS %d\n", ++ 128 << dev->bus->self->pcie_mpss); ++ ++ /* For "MPS Force Max", the assumption is made that ++ * downstream communication will never be larger than ++ * the MRRS. So, the MPS only needs to be configured ++ * for the upstream communication. This being the case, ++ * walk from the top down and set the MPS of the child ++ * to that of the parent bus. ++ */ ++ mps = 128 << dev->bus->self->pcie_mpss; ++ if (mps > dev_mpss) ++ dev_warn(&dev->dev, "MPS configured higher than" ++ " maximum supported by the device. If" ++ " a bus issue occurs, try running with" ++ " pci=pcie_bus_safe.\n"); ++ } ++ ++ dev->pcie_mpss = ffs(mps) - 8; ++ } ++ ++ rc = pcie_set_mps(dev, mps); ++ if (rc) ++ dev_err(&dev->dev, "Failed attempting to set the MPS\n"); ++} ++ ++static void pcie_write_mrrs(struct pci_dev *dev, int mps) ++{ ++ int rc, mrrs; ++ ++ if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { ++ int dev_mpss = 128 << dev->pcie_mpss; ++ ++ /* For Max performance, the MRRS must be set to the largest ++ * supported value. However, it cannot be configured larger ++ * than the MPS the device or the bus can support. This assumes ++ * that the largest MRRS available on the device cannot be ++ * smaller than the device MPSS. ++ */ ++ mrrs = mps < dev_mpss ? mps : dev_mpss; ++ } else ++ /* In the "safe" case, configure the MRRS for fairness on the ++ * bus by making all devices have the same size ++ */ ++ mrrs = mps; ++ ++ ++ /* MRRS is a R/W register. Invalid values can be written, but a ++ * subsiquent read will verify if the value is acceptable or not. ++ * If the MRRS value provided is not acceptable (e.g., too large), ++ * shrink the value until it is acceptable to the HW. ++ */ ++ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { ++ rc = pcie_set_readrq(dev, mrrs); ++ if (rc) ++ dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); ++ ++ mrrs /= 2; ++ } ++} ++ ++static int pcie_bus_configure_set(struct pci_dev *dev, void *data) ++{ ++ int mps = 128 << *(u8 *)data; ++ ++ if (!pci_is_pcie(dev)) ++ return 0; ++ ++ dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", ++ pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev)); ++ ++ pcie_write_mps(dev, mps); ++ pcie_write_mrrs(dev, mps); ++ ++ dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", ++ pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev)); ++ ++ return 0; ++} ++ ++/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down, ++ * parents then children fashion. If this changes, then this code will not ++ * work as designed. ++ */ ++void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) ++{ ++ u8 smpss = mpss; ++ ++ if (!bus->self) ++ return; ++ ++ if (!pci_is_pcie(bus->self)) ++ return; ++ ++ if (pcie_bus_config == PCIE_BUS_SAFE) { ++ pcie_find_smpss(bus->self, &smpss); ++ pci_walk_bus(bus, pcie_find_smpss, &smpss); ++ } ++ ++ pcie_bus_configure_set(bus->self, &smpss); ++ pci_walk_bus(bus, pcie_bus_configure_set, &smpss); ++} ++ + unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) + { + unsigned int devfn, pass, max = bus->secondary; +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -251,7 +251,8 @@ struct pci_dev { + u8 revision; /* PCI revision, low byte of class word */ + u8 hdr_type; /* PCI header type (`multi' flag masked out) */ + u8 pcie_cap; /* PCI-E capability offset */ +- u8 pcie_type; /* PCI-E device/port type */ ++ u8 pcie_type:4; /* PCI-E device/port type */ ++ u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ + u8 rom_base_reg; /* which config register controls the ROM */ + u8 pin; /* which interrupt pin this device uses */ + +@@ -617,6 +618,16 @@ struct pci_driver { + /* these external functions are only available when PCI support is enabled */ + #ifdef CONFIG_PCI + ++extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); ++ ++enum pcie_bus_config_types { ++ PCIE_BUS_PERFORMANCE, ++ PCIE_BUS_SAFE, ++ PCIE_BUS_PEER2PEER, ++}; ++ ++extern enum pcie_bus_config_types pcie_bus_config; ++ + extern struct bus_type pci_bus_type; + + /* Do NOT directly access these two variables, unless you are arch specific pci +@@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); + int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); + int pcie_get_readrq(struct pci_dev *dev); + int pcie_set_readrq(struct pci_dev *dev, int rq); ++int pcie_get_mps(struct pci_dev *dev); ++int pcie_set_mps(struct pci_dev *dev, int mps); + int __pci_reset_function(struct pci_dev *dev); + int pci_reset_function(struct pci_dev *dev); + void pci_update_resource(struct pci_dev *dev, int resno); diff --git a/queue-3.0/sched-fix-up-wchan-borkage.patch b/queue-3.0/sched-fix-up-wchan-borkage.patch new file mode 100644 index 0000000000..bc7eff7e6c --- /dev/null +++ b/queue-3.0/sched-fix-up-wchan-borkage.patch @@ -0,0 +1,35 @@ +From 6ebbe7a07b3bc40b168d2afc569a6543c020d2e3 Mon Sep 17 00:00:00 2001 +From: Simon Kirby <sim@hostway.ca> +Date: Thu, 22 Sep 2011 17:03:46 -0700 +Subject: sched: Fix up wchan borkage + +From: Simon Kirby <sim@hostway.ca> + +commit 6ebbe7a07b3bc40b168d2afc569a6543c020d2e3 upstream. + +Commit c259e01a1ec ("sched: Separate the scheduler entry for +preemption") contained a boo-boo wrecking wchan output. It forgot to +put the new schedule() function in the __sched section and thereby +doesn't get properly ignored for things like wchan. + +Tested-by: Simon Kirby <sim@hostway.ca> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Link: http://lkml.kernel.org/r/20110923000346.GA25425@hostway.ca +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -4335,7 +4335,7 @@ static inline void sched_submit_work(str + blk_schedule_flush_plug(tsk); + } + +-asmlinkage void schedule(void) ++asmlinkage void __sched schedule(void) + { + struct task_struct *tsk = current; + diff --git a/queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch b/queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch new file mode 100644 index 0000000000..4f3aba4eaa --- /dev/null +++ b/queue-3.0/sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch @@ -0,0 +1,57 @@ +From 3be209a8e22cedafc1b6945608b7bb8d9887ab61 Mon Sep 17 00:00:00 2001 +From: Shawn Bohrer <sbohrer@rgmadvisors.com> +Date: Mon, 12 Sep 2011 09:28:04 -0500 +Subject: sched/rt: Migrate equal priority tasks to available CPUs + +From: Shawn Bohrer <sbohrer@rgmadvisors.com> + +commit 3be209a8e22cedafc1b6945608b7bb8d9887ab61 upstream. + +Commit 43fa5460fe60dea5c610490a1d263415419c60f6 ("sched: Try not to +migrate higher priority RT tasks") also introduced a change in behavior +which keeps RT tasks on the same CPU if there is an equal priority RT +task currently running even if there are empty CPUs available. + +This can cause unnecessary wakeup latencies, and can prevent the +scheduler from balancing all RT tasks across available CPUs. + +This change causes an RT task to search for a new CPU if an equal +priority RT task is already running on wakeup. Lower priority tasks +will still have to wait on higher priority tasks, but the system should +still balance out because there is always the possibility that if there +are both a high and low priority RT tasks on a given CPU that the high +priority task could wakeup while the low priority task is running and +force it to search for a better runqueue. + +Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com> +Acked-by: Steven Rostedt <rostedt@goodmis.org> +Tested-by: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Link: http://lkml.kernel.org/r/1315837684-18733-1-git-send-email-sbohrer@rgmadvisors.com +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + kernel/sched_rt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/sched_rt.c ++++ b/kernel/sched_rt.c +@@ -1038,7 +1038,7 @@ select_task_rq_rt(struct task_struct *p, + */ + if (curr && unlikely(rt_task(curr)) && + (curr->rt.nr_cpus_allowed < 2 || +- curr->prio < p->prio) && ++ curr->prio <= p->prio) && + (p->rt.nr_cpus_allowed > 1)) { + int target = find_lowest_rq(p); + +@@ -1569,7 +1569,7 @@ static void task_woken_rt(struct rq *rq, + p->rt.nr_cpus_allowed > 1 && + rt_task(rq->curr) && + (rq->curr->rt.nr_cpus_allowed < 2 || +- rq->curr->prio < p->prio)) ++ rq->curr->prio <= p->prio)) + push_rt_tasks(rq); + } + diff --git a/queue-3.0/series b/queue-3.0/series new file mode 100644 index 0000000000..bd2b856cef --- /dev/null +++ b/queue-3.0/series @@ -0,0 +1,9 @@ +pci-set-pci-e-max-payload-size-on-fabric.patch +pci-export-pcie_bus_configure_settings-symbol.patch +pci-remove-mrrs-modification-from-mps-setting-code.patch +pci-don-t-crash-when-reading-mpss-from-root-complex.patch +sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch + +sched-rt-migrate-equal-priority-tasks-to-available-cpus.patch +sched-fix-up-wchan-borkage.patch +ide-disk-fix-request-requeuing.patch diff --git a/queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch b/queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch new file mode 100644 index 0000000000..19527e09e9 --- /dev/null +++ b/queue-3.0/sparc64-force-the-execute-bit-in-openfirmware-s-translation-entries.patch @@ -0,0 +1,58 @@ +From f4142cba4e4065a416e78ade905bea29ff3930e6 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Thu, 29 Sep 2011 12:18:59 -0700 +Subject: sparc64: Force the execute bit in OpenFirmware's translation entries. +Status: RO +Content-Length: 1822 +Lines: 48 + +From: "David S. Miller" <davem@davemloft.net> + +In the OF 'translations' property, the template TTEs in the mappings +never specify the executable bit. This is the case even though some +of these mappings are for OF's code segment. + +Therefore, we need to force the execute bit on in every mapping. + +This problem can only really trigger on Niagara/sun4v machines and the +history behind this is a little complicated. + +Previous to sun4v, the sun4u TTE entries lacked a hardware execute +permission bit. So OF didn't have to ever worry about setting +anything to handle executable pages. Any valid TTE loaded into the +I-TLB would be respected by the chip. + +But sun4v Niagara chips have a real hardware enforced executable bit +in their TTEs. So it has to be set or else the I-TLB throws an +instruction access exception with type code 6 (protection violation). + +We've been extremely fortunate to not get bitten by this in the past. + +The best I can tell is that the OF's mappings for it's executable code +were mapped using permanent locked mappings on sun4v in the past. +Therefore, the fact that we didn't have the exec bit set in the OF +translations we would use did not matter in practice. + +Thanks to Greg Onufer for helping me track this down. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/sparc/mm/init_64.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -511,6 +511,11 @@ static void __init read_obp_translations + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data &= ~0x0003fe0000000000UL; + } ++ ++ /* Force execute bit on. */ ++ for (i = 0; i < prom_trans_ents; i++) ++ prom_trans[i].data |= (tlb_type == hypervisor ? ++ _PAGE_EXEC_4V : _PAGE_EXEC_4U); + } + + static void __init hypervisor_tlb_lock(unsigned long vaddr, |