s390/cio: recover from bad paths

In some situations we don't receive notification from firmware that
a previously unusable channelpath is usable again.

Schedule recovery for devices that return from path verification
without using all potentially usable paths. The recovery thread will
periodically trigger a path verification on the affected devices.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Suggested-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Sebastian Ott 2017-09-14 13:55:22 +02:00 committed by Martin Schwidefsky
parent c8b8502415
commit 55fb734757
4 changed files with 24 additions and 3 deletions

View file

@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev)
static int recovery_check(struct device *dev, void *data)
{
struct ccw_device *cdev = to_ccwdev(dev);
struct subchannel *sch;
int *redo = data;
spin_lock_irq(cdev->ccwlock);
switch (cdev->private->state) {
case DEV_STATE_ONLINE:
sch = to_subchannel(cdev->dev.parent);
if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm)
break;
/* fall through */
case DEV_STATE_DISCONNECTED:
CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
cdev->private->dev_id.ssid,
@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused)
}
spin_unlock_irq(&recovery_lock);
} else
CIO_MSG_EVENT(4, "recovery: end\n");
CIO_MSG_EVENT(3, "recovery: end\n");
}
static DECLARE_WORK(recovery_work, recovery_work_func);
@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data)
schedule_work(&recovery_work);
}
static void ccw_device_schedule_recovery(void)
void ccw_device_schedule_recovery(void)
{
unsigned long flags;
CIO_MSG_EVENT(4, "recovery: schedule\n");
CIO_MSG_EVENT(3, "recovery: schedule\n");
spin_lock_irqsave(&recovery_lock, flags);
if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
recovery_phase = 0;

View file

@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev);
void ccw_device_set_notoper(struct ccw_device *cdev);
void ccw_device_set_timeout(struct ccw_device *, int);
void ccw_device_schedule_recovery(void);
/* Channel measurement facility related */
void retry_set_schib(struct ccw_device *cdev);

View file

@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type)
}
}
static void ccw_device_handle_broken_paths(struct ccw_device *cdev)
{
struct subchannel *sch = to_subchannel(cdev->dev.parent);
u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm;
if (broken_paths && (cdev->private->path_broken_mask != broken_paths))
ccw_device_schedule_recovery();
cdev->private->path_broken_mask = broken_paths;
}
void ccw_device_verify_done(struct ccw_device *cdev, int err)
{
struct subchannel *sch;
@ -508,6 +519,7 @@ callback:
memset(&cdev->private->irb, 0, sizeof(struct irb));
}
ccw_device_report_path_events(cdev);
ccw_device_handle_broken_paths(cdev);
break;
case -ETIME:
case -EUSERS:

View file

@ -131,6 +131,8 @@ struct ccw_device_private {
not operable */
u8 path_gone_mask; /* mask of paths, that became unavailable */
u8 path_new_mask; /* mask of paths, that became available */
u8 path_broken_mask; /* mask of paths, which were found to be
unusable */
struct {
unsigned int fast:1; /* post with "channel end" */
unsigned int repall:1; /* report every interrupt status */