diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 1abfdba8d99..31738627ec6 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -44,6 +44,12 @@ #include "libata.h" +enum { + ATA_EH_SPDN_NCQ_OFF = (1 << 0), + ATA_EH_SPDN_SPEED_DOWN = (1 << 1), + ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), +}; + static void __ata_port_freeze(struct ata_port *ap); static void ata_eh_finish(struct ata_port *ap); static void ata_eh_handle_port_suspend(struct ata_port *ap); @@ -65,12 +71,9 @@ static void ata_ering_record(struct ata_ering *ering, int is_io, ent->timestamp = get_jiffies_64(); } -static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering) +static void ata_ering_clear(struct ata_ering *ering) { - struct ata_ering_entry *ent = &ering->ring[ering->cursor]; - if (!ent->err_mask) - return NULL; - return ent; + memset(ering, 0, sizeof(*ering)); } static int ata_ering_map(struct ata_ering *ering, @@ -1159,87 +1162,99 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return action; } -static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent) +static int ata_eh_categorize_error(int is_io, unsigned int err_mask) { - if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT)) + if (err_mask & AC_ERR_ATA_BUS) return 1; - if (ent->is_io) { - if (ent->err_mask & AC_ERR_HSM) - return 1; - if ((ent->err_mask & - (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) + if (err_mask & AC_ERR_TIMEOUT) + return 2; + + if (is_io) { + if (err_mask & AC_ERR_HSM) return 2; + if ((err_mask & + (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) + return 3; } return 0; } -struct speed_down_needed_arg { +struct speed_down_verdict_arg { u64 since; - int nr_errors[3]; + int nr_errors[4]; }; -static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg) +static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) { - struct speed_down_needed_arg *arg = void_arg; + struct speed_down_verdict_arg *arg = void_arg; + int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); if (ent->timestamp < arg->since) return -1; - arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++; + arg->nr_errors[cat]++; return 0; } /** - * ata_eh_speed_down_needed - Determine wheter speed down is necessary + * ata_eh_speed_down_verdict - Determine speed down verdict * @dev: Device of interest * * This function examines error ring of @dev and determines - * whether speed down is necessary. Speed down is necessary if - * there have been more than 3 of Cat-1 errors or 10 of Cat-2 - * errors during last 15 minutes. + * whether NCQ needs to be turned off, transfer speed should be + * stepped down, or falling back to PIO is necessary. * - * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM - * violation for known supported commands. + * Cat-1 is ATA_BUS error for any command. * - * Cat-2 errors are unclassified DEV error for known supported + * Cat-2 is TIMEOUT for any command or HSM violation for known + * supported commands. + * + * Cat-3 is is unclassified DEV error for known supported * command. * + * NCQ needs to be turned off if there have been more than 3 + * Cat-2 + Cat-3 errors during last 10 minutes. + * + * Speed down is necessary if there have been more than 3 Cat-1 + + * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. + * + * Falling back to PIO mode is necessary if there have been more + * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. + * * LOCKING: * Inherited from caller. * * RETURNS: - * 1 if speed down is necessary, 0 otherwise + * OR of ATA_EH_SPDN_* flags. */ -static int ata_eh_speed_down_needed(struct ata_device *dev) +static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) { - const u64 interval = 15LLU * 60 * HZ; - static const int err_limits[3] = { -1, 3, 10 }; - struct speed_down_needed_arg arg; - struct ata_ering_entry *ent; - int err_cat; - u64 j64; - - ent = ata_ering_top(&dev->ering); - if (!ent) - return 0; - - err_cat = ata_eh_categorize_ering_entry(ent); - if (err_cat == 0) - return 0; + const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; + u64 j64 = get_jiffies_64(); + struct speed_down_verdict_arg arg; + unsigned int verdict = 0; + /* scan past 10 mins of error history */ memset(&arg, 0, sizeof(arg)); + arg.since = j64 - min(j64, j10mins); + ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); - j64 = get_jiffies_64(); - if (j64 >= interval) - arg.since = j64 - interval; - else - arg.since = 0; + if (arg.nr_errors[2] + arg.nr_errors[3] > 3) + verdict |= ATA_EH_SPDN_NCQ_OFF; + if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) + verdict |= ATA_EH_SPDN_SPEED_DOWN; - ata_ering_map(&dev->ering, speed_down_needed_cb, &arg); + /* scan past 3 mins of error history */ + memset(&arg, 0, sizeof(arg)); + arg.since = j64 - min(j64, j5mins); + ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); - return arg.nr_errors[err_cat] > err_limits[err_cat]; + if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) + verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; + + return verdict; } /** @@ -1257,31 +1272,80 @@ static int ata_eh_speed_down_needed(struct ata_device *dev) * Kernel thread context (may sleep). * * RETURNS: - * 0 on success, -errno otherwise + * Determined recovery action. */ -static int ata_eh_speed_down(struct ata_device *dev, int is_io, - unsigned int err_mask) +static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, + unsigned int err_mask) { - if (!err_mask) + unsigned int verdict; + unsigned int action = 0; + + /* don't bother if Cat-0 error */ + if (ata_eh_categorize_error(is_io, err_mask) == 0) return 0; /* record error and determine whether speed down is necessary */ ata_ering_record(&dev->ering, is_io, err_mask); + verdict = ata_eh_speed_down_verdict(dev); - if (!ata_eh_speed_down_needed(dev)) - return 0; + /* turn off NCQ? */ + if ((verdict & ATA_EH_SPDN_NCQ_OFF) && + (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | + ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { + dev->flags |= ATA_DFLAG_NCQ_OFF; + ata_dev_printk(dev, KERN_WARNING, + "NCQ disabled due to excessive errors\n"); + goto done; + } - /* speed down SATA link speed if possible */ - if (sata_down_spd_limit(dev->ap) == 0) - return ATA_EH_HARDRESET; + /* speed down? */ + if (verdict & ATA_EH_SPDN_SPEED_DOWN) { + /* speed down SATA link speed if possible */ + if (sata_down_spd_limit(dev->ap) == 0) { + action |= ATA_EH_HARDRESET; + goto done; + } - /* lower transfer mode */ - if (ata_down_xfermask_limit(dev, ATA_DNXFER_ANY) == 0) - return ATA_EH_SOFTRESET; + /* lower transfer mode */ + if (dev->spdn_cnt < 2) { + static const int dma_dnxfer_sel[] = + { ATA_DNXFER_DMA, ATA_DNXFER_40C }; + static const int pio_dnxfer_sel[] = + { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; + int sel; + + if (dev->xfer_shift != ATA_SHIFT_PIO) + sel = dma_dnxfer_sel[dev->spdn_cnt]; + else + sel = pio_dnxfer_sel[dev->spdn_cnt]; + + dev->spdn_cnt++; + + if (ata_down_xfermask_limit(dev, sel) == 0) { + action |= ATA_EH_SOFTRESET; + goto done; + } + } + } + + /* Fall back to PIO? Slowing down to PIO is meaningless for + * SATA. Consider it only for PATA. + */ + if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && + (dev->ap->cbl != ATA_CBL_SATA) && + (dev->xfer_shift != ATA_SHIFT_PIO)) { + if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { + dev->spdn_cnt = 0; + action |= ATA_EH_SOFTRESET; + goto done; + } + } - ata_dev_printk(dev, KERN_ERR, - "speed down requested but no transfer mode left\n"); return 0; + done: + /* device has been slowed down, blow error history */ + ata_ering_clear(&dev->ering); + return action; } /** diff --git a/include/linux/libata.h b/include/linux/libata.h index 60dfc5f7e88..5db50fa594a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -495,6 +495,7 @@ struct ata_device { /* error history */ struct ata_ering ering; + int spdn_cnt; unsigned int horkage; /* List of broken features */ #ifdef CONFIG_SATA_ACPI /* ACPI objects info */