block: Allow devices to indicate whether discarded blocks are zeroed

The discard ioctl is used by mkfs utilities to clear a block device
prior to putting metadata down.  However, not all devices return zeroed
blocks after a discard.  Some drives return stale data, potentially
containing old superblocks.  It is therefore important to know whether
discarded blocks are properly zeroed.

Both ATA and SCSI drives have configuration bits that indicate whether
zeroes are returned after a discard operation.  Implement a block level
interface that allows this information to be bubbled up the stack and
queried via a new block device ioctl.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
Martin K. Petersen 2009-12-03 09:24:48 +01:00 committed by Jens Axboe
parent 464191c65b
commit 98262f2762
6 changed files with 32 additions and 0 deletions

View file

@ -101,6 +101,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->discard_granularity = 0; lim->discard_granularity = 0;
lim->discard_alignment = 0; lim->discard_alignment = 0;
lim->discard_misaligned = 0; lim->discard_misaligned = 0;
lim->discard_zeroes_data = -1;
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
lim->alignment_offset = 0; lim->alignment_offset = 0;
@ -544,6 +545,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->io_min = max(t->io_min, b->io_min); t->io_min = max(t->io_min, b->io_min);
t->no_cluster |= b->no_cluster; t->no_cluster |= b->no_cluster;
t->discard_zeroes_data &= b->discard_zeroes_data;
/* Bottom device offset aligned? */ /* Bottom device offset aligned? */
if (offset && if (offset &&

View file

@ -136,6 +136,11 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
return queue_var_show(q->limits.max_discard_sectors << 9, page); return queue_var_show(q->limits.max_discard_sectors << 9, page);
} }
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_discard_zeroes_data(q), page);
}
static ssize_t static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{ {
@ -313,6 +318,11 @@ static struct queue_sysfs_entry queue_discard_max_entry = {
.show = queue_discard_max_show, .show = queue_discard_max_show,
}; };
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = { static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_nonrot_show, .show = queue_nonrot_show,
@ -350,6 +360,7 @@ static struct attribute *default_attrs[] = {
&queue_io_opt_entry.attr, &queue_io_opt_entry.attr,
&queue_discard_granularity_entry.attr, &queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr, &queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_nonrot_entry.attr, &queue_nonrot_entry.attr,
&queue_nomerges_entry.attr, &queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr, &queue_rq_affinity_entry.attr,

View file

@ -747,6 +747,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return compat_put_uint(arg, bdev_io_opt(bdev)); return compat_put_uint(arg, bdev_io_opt(bdev));
case BLKALIGNOFF: case BLKALIGNOFF:
return compat_put_int(arg, bdev_alignment_offset(bdev)); return compat_put_int(arg, bdev_alignment_offset(bdev));
case BLKDISCARDZEROES:
return compat_put_uint(arg, bdev_discard_zeroes_data(bdev));
case BLKFLSBUF: case BLKFLSBUF:
case BLKROSET: case BLKROSET:
case BLKDISCARD: case BLKDISCARD:

View file

@ -280,6 +280,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return put_uint(arg, bdev_io_opt(bdev)); return put_uint(arg, bdev_io_opt(bdev));
case BLKALIGNOFF: case BLKALIGNOFF:
return put_int(arg, bdev_alignment_offset(bdev)); return put_int(arg, bdev_alignment_offset(bdev));
case BLKDISCARDZEROES:
return put_uint(arg, bdev_discard_zeroes_data(bdev));
case BLKSECTGET: case BLKSECTGET:
return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
case BLKRASET: case BLKRASET:

View file

@ -322,6 +322,7 @@ struct queue_limits {
unsigned char misaligned; unsigned char misaligned;
unsigned char discard_misaligned; unsigned char discard_misaligned;
unsigned char no_cluster; unsigned char no_cluster;
signed char discard_zeroes_data;
}; };
struct request_queue struct request_queue
@ -1150,6 +1151,19 @@ static inline int queue_sector_discard_alignment(struct request_queue *q,
& (q->limits.discard_granularity - 1); & (q->limits.discard_granularity - 1);
} }
static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
{
if (q->limits.discard_zeroes_data == 1)
return 1;
return 0;
}
static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
{
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}
static inline int queue_dma_alignment(struct request_queue *q) static inline int queue_dma_alignment(struct request_queue *q)
{ {
return q ? q->dma_alignment : 511; return q ? q->dma_alignment : 511;

View file

@ -304,6 +304,7 @@ struct inodes_stat_t {
#define BLKIOOPT _IO(0x12,121) #define BLKIOOPT _IO(0x12,121)
#define BLKALIGNOFF _IO(0x12,122) #define BLKALIGNOFF _IO(0x12,122)
#define BLKPBSZGET _IO(0x12,123) #define BLKPBSZGET _IO(0x12,123)
#define BLKDISCARDZEROES _IO(0x12,124)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */ #define FIBMAP _IO(0x00,1) /* bmap access */