aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/s390/block/dasd.c
diff options
context:
space:
mode:
authorStefan Haberland <sth@linux.vnet.ibm.com>2016-08-08 15:56:54 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-12-12 12:05:03 +0100
commita521b048bc8c5d3c57a468c2cba70eb60e873616 (patch)
treed84e42b5653a1761a5cf7307ab4fef0e884f1c07 /drivers/s390/block/dasd.c
parentc93461515a1a16486f4e483cb34170366fa73ea1 (diff)
downloadlinux-a521b048bc8c5d3c57a468c2cba70eb60e873616.tar.gz
s390/dasd: channel path aware error recovery
With this feature, the DASD device driver more robustly handles DASDs that are attached via multiple channel paths and are subject to constant Interface-Control-Checks (IFCCs) and Channel-Control-Checks (CCCs) or loss of High-Performance-FICON (HPF) functionality on one or more of these paths. If a channel path does not work correctly, it is removed from normal operation as long as other channel paths are available. All extended error recovery states can be queried and reset via user space interfaces. Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com> Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Jan Hoeppner <hoeppner@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'drivers/s390/block/dasd.c')
-rw-r--r--drivers/s390/block/dasd.c149
1 files changed, 111 insertions, 38 deletions
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 13a337faef43f..0e3fdfdbd0984 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -69,6 +69,7 @@ static void dasd_block_tasklet(struct dasd_block *);
static void do_kick_device(struct work_struct *);
static void do_restore_device(struct work_struct *);
static void do_reload_device(struct work_struct *);
+static void do_requeue_requests(struct work_struct *);
static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *);
static void dasd_device_timeout(unsigned long);
static void dasd_block_timeout(unsigned long);
@@ -125,6 +126,7 @@ struct dasd_device *dasd_alloc_device(void)
INIT_WORK(&device->kick_work, do_kick_device);
INIT_WORK(&device->restore_device, do_restore_device);
INIT_WORK(&device->reload_device, do_reload_device);
+ INIT_WORK(&device->requeue_requests, do_requeue_requests);
device->state = DASD_STATE_NEW;
device->target = DASD_STATE_NEW;
mutex_init(&device->state_mutex);
@@ -1622,6 +1624,13 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
}
EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
+static int dasd_check_hpf_error(struct irb *irb)
+{
+ return (scsw_tm_is_valid_schxs(&irb->scsw) &&
+ (irb->scsw.tm.sesq == SCSW_SESQ_DEV_NOFCX ||
+ irb->scsw.tm.sesq == SCSW_SESQ_PATH_NOFCX));
+}
+
/*
* Interrupt handler for "normal" ssch-io based dasd devices.
*/
@@ -1748,6 +1757,13 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
struct dasd_ccw_req, devlist);
}
} else { /* error */
+ /* check for HPF error
+ * call discipline function to requeue all requests
+ * and disable HPF accordingly
+ */
+ if (cqr->cpmode && dasd_check_hpf_error(irb) &&
+ device->discipline->handle_hpf_error)
+ device->discipline->handle_hpf_error(device, irb);
/*
* If we don't want complex ERP for this request, then just
* reset this and retry it in the fastpath
@@ -2924,10 +2940,10 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
if (!block)
return -EINVAL;
- spin_lock_irqsave(&block->queue_lock, flags);
+ spin_lock_irqsave(&block->request_queue_lock, flags);
req = (struct request *) cqr->callback_data;
blk_requeue_request(block->request_queue, req);
- spin_unlock_irqrestore(&block->queue_lock, flags);
+ spin_unlock_irqrestore(&block->request_queue_lock, flags);
return 0;
}
@@ -3701,7 +3717,7 @@ EXPORT_SYMBOL_GPL(dasd_generic_notify);
void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
{
struct dasd_device *device;
- int chp, oldopm;
+ int chp, oldopm, hpfpm, ifccpm;
device = dasd_device_from_cdev_locked(cdev);
if (IS_ERR(device))
@@ -3733,7 +3749,30 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
device->discipline->kick_validate(device);
}
}
- if (oldopm && !dasd_path_get_opm(device)) {
+ hpfpm = dasd_path_get_hpfpm(device);
+ ifccpm = dasd_path_get_ifccpm(device);
+ if (!dasd_path_get_opm(device) && hpfpm) {
+ /*
+ * device has no operational paths but at least one path is
+ * disabled due to HPF errors
+ * disable HPF at all and use the path(s) again
+ */
+ if (device->discipline->disable_hpf)
+ device->discipline->disable_hpf(device);
+ dasd_device_set_stop_bits(device, DASD_STOPPED_NOT_ACC);
+ dasd_path_set_tbvpm(device, hpfpm);
+ dasd_schedule_device_bh(device);
+ dasd_schedule_requeue(device);
+ } else if (!dasd_path_get_opm(device) && ifccpm) {
+ /*
+ * device has no operational paths but at least one path is
+ * disabled due to IFCC errors
+ * trigger path verification on paths with IFCC errors
+ */
+ dasd_path_set_tbvpm(device, ifccpm);
+ dasd_schedule_device_bh(device);
+ }
+ if (oldopm && !dasd_path_get_opm(device) && !hpfpm && !ifccpm) {
dev_warn(&device->cdev->dev,
"No verified channel paths remain for the device\n");
DBF_DEV_EVENT(DBF_WARNING, device,
@@ -3757,30 +3796,18 @@ int dasd_generic_verify_path(struct dasd_device *device, __u8 lpm)
}
EXPORT_SYMBOL_GPL(dasd_generic_verify_path);
-
-int dasd_generic_pm_freeze(struct ccw_device *cdev)
+/*
+ * clear active requests and requeue them to block layer if possible
+ */
+static int dasd_generic_requeue_all_requests(struct dasd_device *device)
{
- struct dasd_device *device = dasd_device_from_cdev(cdev);
- struct list_head freeze_queue;
+ struct list_head requeue_queue;
struct dasd_ccw_req *cqr, *n;
struct dasd_ccw_req *refers;
int rc;
- if (IS_ERR(device))
- return PTR_ERR(device);
-
- /* mark device as suspended */
- set_bit(DASD_FLAG_SUSPENDED, &device->flags);
-
- if (device->discipline->freeze)
- rc = device->discipline->freeze(device);
-
- /* disallow new I/O */
- dasd_device_set_stop_bits(device, DASD_STOPPED_PM);
-
- /* clear active requests and requeue them to block layer if possible */
- INIT_LIST_HEAD(&freeze_queue);
- spin_lock_irq(get_ccwdev_lock(cdev));
+ INIT_LIST_HEAD(&requeue_queue);
+ spin_lock_irq(get_ccwdev_lock(device->cdev));
rc = 0;
list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
/* Check status and move request to flush_queue */
@@ -3791,25 +3818,22 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
dev_err(&device->cdev->dev,
"Unable to terminate request %p "
"on suspend\n", cqr);
- spin_unlock_irq(get_ccwdev_lock(cdev));
+ spin_unlock_irq(get_ccwdev_lock(device->cdev));
dasd_put_device(device);
return rc;
}
}
- list_move_tail(&cqr->devlist, &freeze_queue);
+ list_move_tail(&cqr->devlist, &requeue_queue);
}
- spin_unlock_irq(get_ccwdev_lock(cdev));
+ spin_unlock_irq(get_ccwdev_lock(device->cdev));
- list_for_each_entry_safe(cqr, n, &freeze_queue, devlist) {
+ list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
wait_event(dasd_flush_wq,
(cqr->status != DASD_CQR_CLEAR_PENDING));
- if (cqr->status == DASD_CQR_CLEARED)
- cqr->status = DASD_CQR_QUEUED;
- /* requeue requests to blocklayer will only work for
- block device requests */
- if (_dasd_requeue_request(cqr))
- continue;
+ /* mark sleepon requests as ended */
+ if (cqr->callback_data == DASD_SLEEPON_START_TAG)
+ cqr->callback_data = DASD_SLEEPON_END_TAG;
/* remove requests from device and block queue */
list_del_init(&cqr->devlist);
@@ -3821,6 +3845,14 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
dasd_free_erp_request(cqr, cqr->memdev);
cqr = refers;
}
+
+ /*
+ * requeue requests to blocklayer will only work
+ * for block device requests
+ */
+ if (_dasd_requeue_request(cqr))
+ continue;
+
if (cqr->block)
list_del_init(&cqr->blocklist);
cqr->block->base->discipline->free_cp(
@@ -3831,15 +3863,56 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
* if requests remain then they are internal request
* and go back to the device queue
*/
- if (!list_empty(&freeze_queue)) {
+ if (!list_empty(&requeue_queue)) {
/* move freeze_queue to start of the ccw_queue */
- spin_lock_irq(get_ccwdev_lock(cdev));
- list_splice_tail(&freeze_queue, &device->ccw_queue);
- spin_unlock_irq(get_ccwdev_lock(cdev));
+ spin_lock_irq(get_ccwdev_lock(device->cdev));
+ list_splice_tail(&requeue_queue, &device->ccw_queue);
+ spin_unlock_irq(get_ccwdev_lock(device->cdev));
}
- dasd_put_device(device);
+ /* wake up generic waitqueue for eventually ended sleepon requests */
+ wake_up(&generic_waitq);
return rc;
}
+
+static void do_requeue_requests(struct work_struct *work)
+{
+ struct dasd_device *device = container_of(work, struct dasd_device,
+ requeue_requests);
+ dasd_generic_requeue_all_requests(device);
+ dasd_device_remove_stop_bits(device, DASD_STOPPED_NOT_ACC);
+ if (device->block)
+ dasd_schedule_block_bh(device->block);
+ dasd_put_device(device);
+}
+
+void dasd_schedule_requeue(struct dasd_device *device)
+{
+ dasd_get_device(device);
+ /* queue call to dasd_reload_device to the kernel event daemon. */
+ if (!schedule_work(&device->requeue_requests))
+ dasd_put_device(device);
+}
+EXPORT_SYMBOL(dasd_schedule_requeue);
+
+int dasd_generic_pm_freeze(struct ccw_device *cdev)
+{
+ struct dasd_device *device = dasd_device_from_cdev(cdev);
+ int rc;
+
+ if (IS_ERR(device))
+ return PTR_ERR(device);
+
+ /* mark device as suspended */
+ set_bit(DASD_FLAG_SUSPENDED, &device->flags);
+
+ if (device->discipline->freeze)
+ rc = device->discipline->freeze(device);
+
+ /* disallow new I/O */
+ dasd_device_set_stop_bits(device, DASD_STOPPED_PM);
+
+ return dasd_generic_requeue_all_requests(device);
+}
EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze);
int dasd_generic_restore_device(struct ccw_device *cdev)