[PATCH] s390: dasd extended error reporting module

The DASD extended error reporting is a facility that allows to get detailed
information about certain problems in the DASD I/O.  This information can be
used to implement fail-over applications that can recover these problems.

Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Stefan Weinhuber 2006-02-03 03:03:49 -08:00 committed by Linus Torvalds
parent d237bf4926
commit 12c3a54848
8 changed files with 1231 additions and 5 deletions

View file

@ -55,13 +55,21 @@ config DASD_DIAG
Disks under VM. If you are not running under VM or unsure what it is,
say "N".
config DASD_EER
tristate "Extended error reporting (EER)"
depends on DASD
help
This driver provides a character device interface to the
DASD extended error reporting. This is only needed if you want to
use applications written for the EER facility.
config DASD_CMB
tristate "Compatibility interface for DASD channel measurement blocks"
depends on DASD
help
This driver provides an additional interface to the channel measurement
facility, which is normally accessed though sysfs, with a set of
ioctl functions specific to the dasd driver.
This driver provides an additional interface to the channel
measurement facility, which is normally accessed though sysfs, with
a set of ioctl functions specific to the dasd driver.
This is only needed if you want to use applications written for
linux-2.4 dasd channel measurement facility interface.

View file

@ -5,6 +5,7 @@
dasd_eckd_mod-objs := dasd_eckd.o dasd_3990_erp.o dasd_9343_erp.o
dasd_fba_mod-objs := dasd_fba.o dasd_3370_erp.o dasd_9336_erp.o
dasd_diag_mod-objs := dasd_diag.o
dasd_eer_mod-objs := dasd_eer.o
dasd_mod-objs := dasd.o dasd_ioctl.o dasd_proc.o dasd_devmap.o \
dasd_genhd.o dasd_erp.o
@ -13,5 +14,6 @@ obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o
obj-$(CONFIG_DASD_ECKD) += dasd_eckd_mod.o
obj-$(CONFIG_DASD_FBA) += dasd_fba_mod.o
obj-$(CONFIG_DASD_CMB) += dasd_cmb.o
obj-$(CONFIG_DASD_EER) += dasd_eer.o
obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o
obj-$(CONFIG_DCSSBLK) += dcssblk.o

View file

@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/hdreg.h>
#include <linux/notifier.h>
#include <asm/ccwdev.h>
#include <asm/ebcdic.h>
@ -57,6 +58,7 @@ static void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *);
static void dasd_flush_ccw_queue(struct dasd_device *, int);
static void dasd_tasklet(struct dasd_device *);
static void do_kick_device(void *data);
static void dasd_disable_eer(struct dasd_device *device);
/*
* SECTION: Operations on the device structure.
@ -151,6 +153,8 @@ dasd_state_new_to_known(struct dasd_device *device)
static inline void
dasd_state_known_to_new(struct dasd_device * device)
{
/* disable extended error reporting for this device */
dasd_disable_eer(device);
/* Forget the discipline information. */
device->discipline = NULL;
device->state = DASD_STATE_NEW;
@ -867,6 +871,9 @@ dasd_handle_state_change_pending(struct dasd_device *device)
struct dasd_ccw_req *cqr;
struct list_head *l, *n;
/* first of all call extended error reporting */
dasd_write_eer_trigger(DASD_EER_STATECHANGE, device, NULL);
device->stopped &= ~DASD_STOPPED_PENDING;
/* restart all 'running' IO on queue */
@ -1086,6 +1093,19 @@ restart:
}
goto restart;
}
/* first of all call extended error reporting */
if (device->eer && cqr->status == DASD_CQR_FAILED) {
dasd_write_eer_trigger(DASD_EER_FATALERROR,
device, cqr);
/* restart request */
cqr->status = DASD_CQR_QUEUED;
cqr->retries = 255;
device->stopped |= DASD_STOPPED_QUIESCE;
goto restart;
}
/* Process finished ERP request. */
if (cqr->refers) {
__dasd_process_erp(device, cqr);
@ -1223,7 +1243,8 @@ __dasd_start_head(struct dasd_device * device)
cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
/* check FAILFAST */
if (device->stopped & ~DASD_STOPPED_PENDING &&
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags)) {
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
(!device->eer)) {
cqr->status = DASD_CQR_FAILED;
dasd_schedule_bh(device);
}
@ -1965,6 +1986,9 @@ dasd_generic_notify(struct ccw_device *cdev, int event)
switch (event) {
case CIO_GONE:
case CIO_NO_PATH:
/* first of all call extended error reporting */
dasd_write_eer_trigger(DASD_EER_NOPATH, device, NULL);
if (device->state < DASD_STATE_BASIC)
break;
/* Device is active. We want to keep it. */
@ -2022,6 +2046,51 @@ dasd_generic_auto_online (struct ccw_driver *dasd_discipline_driver)
put_driver(drv);
}
/*
* notifications for extended error reports
*/
static struct notifier_block *dasd_eer_chain;
int
dasd_register_eer_notifier(struct notifier_block *nb)
{
return notifier_chain_register(&dasd_eer_chain, nb);
}
int
dasd_unregister_eer_notifier(struct notifier_block *nb)
{
return notifier_chain_unregister(&dasd_eer_chain, nb);
}
/*
* Notify the registered error reporting module of a problem
*/
void
dasd_write_eer_trigger(unsigned int id, struct dasd_device *device,
struct dasd_ccw_req *cqr)
{
if (device->eer) {
struct dasd_eer_trigger temp;
temp.id = id;
temp.device = device;
temp.cqr = cqr;
notifier_call_chain(&dasd_eer_chain, DASD_EER_TRIGGER,
(void *)&temp);
}
}
/*
* Tell the registered error reporting module to disable error reporting for
* a given device and to cleanup any private data structures on that device.
*/
static void
dasd_disable_eer(struct dasd_device *device)
{
notifier_call_chain(&dasd_eer_chain, DASD_EER_DISABLE, (void *)device);
}
static int __init
dasd_init(void)
{
@ -2103,6 +2172,11 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online);
EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
EXPORT_SYMBOL_GPL(dasd_generic_auto_online);
EXPORT_SYMBOL(dasd_register_eer_notifier);
EXPORT_SYMBOL(dasd_unregister_eer_notifier);
EXPORT_SYMBOL(dasd_write_eer_trigger);
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically

View file

@ -1108,6 +1108,9 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense)
case 0x0B:
DEV_MESSAGE(KERN_WARNING, device, "%s",
"FORMAT F - Volume is suspended duplex");
/* call extended error reporting (EER) */
dasd_write_eer_trigger(DASD_EER_PPRCSUSPEND, device,
erp->refers);
break;
case 0x0C:
DEV_MESSAGE(KERN_WARNING, device, "%s",

View file

@ -29,6 +29,7 @@
#define DASD_ECKD_CCW_PSF 0x27
#define DASD_ECKD_CCW_RSSD 0x3e
#define DASD_ECKD_CCW_LOCATE_RECORD 0x47
#define DASD_ECKD_CCW_SNSS 0x54
#define DASD_ECKD_CCW_DEFINE_EXTENT 0x63
#define DASD_ECKD_CCW_WRITE_MT 0x85
#define DASD_ECKD_CCW_READ_MT 0x86

File diff suppressed because it is too large Load diff

View file

@ -275,6 +275,34 @@ struct dasd_discipline {
extern struct dasd_discipline *dasd_diag_discipline_pointer;
/*
* Notification numbers for extended error reporting notifications:
* The DASD_EER_DISABLE notification is sent before a dasd_device (and it's
* eer pointer) is freed. The error reporting module needs to do all necessary
* cleanup steps.
* The DASD_EER_TRIGGER notification sends the actual error reports (triggers).
*/
#define DASD_EER_DISABLE 0
#define DASD_EER_TRIGGER 1
/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */
#define DASD_EER_FATALERROR 1
#define DASD_EER_NOPATH 2
#define DASD_EER_STATECHANGE 3
#define DASD_EER_PPRCSUSPEND 4
/*
* The dasd_eer_trigger structure contains all data that we need to send
* along with an DASD_EER_TRIGGER notification.
*/
struct dasd_eer_trigger {
unsigned int id;
struct dasd_device *device;
struct dasd_ccw_req *cqr;
};
struct dasd_device {
/* Block device stuff. */
struct gendisk *gdp;
@ -288,6 +316,9 @@ struct dasd_device {
unsigned long flags; /* per device flags */
unsigned short features; /* copy of devmap-features (read-only!) */
/* extended error reporting stuff (eer) */
void *eer;
/* Device discipline stuff. */
struct dasd_discipline *discipline;
char *private;
@ -488,6 +519,12 @@ int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *);
int dasd_generic_set_offline (struct ccw_device *cdev);
int dasd_generic_notify(struct ccw_device *, int);
void dasd_generic_auto_online (struct ccw_driver *);
int dasd_register_eer_notifier(struct notifier_block *);
int dasd_unregister_eer_notifier(struct notifier_block *);
void dasd_write_eer_trigger(unsigned int , struct dasd_device *,
struct dasd_ccw_req *);
/* externals in dasd_devmap.c */
extern int dasd_max_devindex;

View file

@ -204,7 +204,8 @@ typedef struct attrib_data_t {
*
* Here ist how the ioctl-nr should be used:
* 0 - 31 DASD driver itself
* 32 - 239 still open
* 32 - 229 still open
* 230 - 239 DASD extended error reporting
* 240 - 255 reserved for EMC
*******************************************************************************/
@ -236,12 +237,22 @@ typedef struct attrib_data_t {
#define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t)
/* Get Attributes (cache operations) */
#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t)
/* retrieve extended error-reporting value */
#define BIODASDEERGET _IOR(DASD_IOCTL_LETTER,6,int)
/* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */
#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t)
/* Set Attributes (cache operations) */
#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
/* retrieve extended error-reporting value */
#define BIODASDEERSET _IOW(DASD_IOCTL_LETTER,3,int)
/* remove all records from the eer buffer */
#define DASD_EER_PURGE _IO(DASD_IOCTL_LETTER,230)
/* set the number of pages that are used for the internal eer buffer */
#define DASD_EER_SETBUFSIZE _IOW(DASD_IOCTL_LETTER,230,int)
#endif /* DASD_H */