mirror of
https://github.com/adulau/aha.git
synced 2024-12-27 19:26:25 +00:00
md: Support write-intent bitmaps with externally managed metadata.
In this case, the metadata needs to not be in the same sector as the bitmap. md will not read/write any bitmap metadata. Config must be done via sysfs and when a recovery makes the array non-degraded again, writing 'true' to 'bitmap/can_clear' will allow bits in the bitmap to be cleared again. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
624ce4f565
commit
ece5cff0da
4 changed files with 137 additions and 33 deletions
|
@ -322,6 +322,22 @@ All md devices contain:
|
||||||
'backlog' sets a limit on the number of concurrent background
|
'backlog' sets a limit on the number of concurrent background
|
||||||
writes. If there are more than this, new writes will by
|
writes. If there are more than this, new writes will by
|
||||||
synchronous.
|
synchronous.
|
||||||
|
bitmap/metadata
|
||||||
|
This can be either 'internal' or 'external'.
|
||||||
|
'internal' is the default and means the metadata for the bitmap
|
||||||
|
is stored in the first 256 bytes of the allocated space and is
|
||||||
|
managed by the md module.
|
||||||
|
'external' means that bitmap metadata is managed externally to
|
||||||
|
the kernel (i.e. by some userspace program)
|
||||||
|
bitmap/can_clear
|
||||||
|
This is either 'true' or 'false'. If 'true', then bits in the
|
||||||
|
bitmap will be cleared when the corresponding blocks are thought
|
||||||
|
to be in-sync. If 'false', bits will never be cleared.
|
||||||
|
This is automatically set to 'false' if a write happens on a
|
||||||
|
degraded array, or if the array becomes degraded during a write.
|
||||||
|
When metadata is managed externally, it should be set to true
|
||||||
|
once the array becomes non-degraded, and this fact has been
|
||||||
|
recorded in the metadata.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
|
||||||
|
|
||||||
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
|
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
|
||||||
return;
|
return;
|
||||||
|
if (bitmap->mddev->bitmap_info.external)
|
||||||
|
return;
|
||||||
spin_lock_irqsave(&bitmap->lock, flags);
|
spin_lock_irqsave(&bitmap->lock, flags);
|
||||||
if (!bitmap->sb_page) { /* no superblock */
|
if (!bitmap->sb_page) { /* no superblock */
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||||
|
@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
|
||||||
* general bitmap file operations
|
* general bitmap file operations
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* on-disk bitmap:
|
||||||
|
*
|
||||||
|
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
|
||||||
|
* file a page at a time. There's a superblock at the start of the file.
|
||||||
|
*/
|
||||||
/* calculate the index of the page that contains this bit */
|
/* calculate the index of the page that contains this bit */
|
||||||
static inline unsigned long file_page_index(unsigned long chunk)
|
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
|
||||||
{
|
{
|
||||||
return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
|
if (!bitmap->mddev->bitmap_info.external)
|
||||||
|
chunk += sizeof(bitmap_super_t) << 3;
|
||||||
|
return chunk >> PAGE_BIT_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* calculate the (bit) offset of this bit within a page */
|
/* calculate the (bit) offset of this bit within a page */
|
||||||
static inline unsigned long file_page_offset(unsigned long chunk)
|
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
|
||||||
{
|
{
|
||||||
return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
|
if (!bitmap->mddev->bitmap_info.external)
|
||||||
|
chunk += sizeof(bitmap_super_t) << 3;
|
||||||
|
return chunk & (PAGE_BITS - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
|
||||||
static inline struct page *filemap_get_page(struct bitmap *bitmap,
|
static inline struct page *filemap_get_page(struct bitmap *bitmap,
|
||||||
unsigned long chunk)
|
unsigned long chunk)
|
||||||
{
|
{
|
||||||
if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
|
if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
|
||||||
return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
|
return bitmap->filemap[file_page_index(bitmap, chunk)
|
||||||
|
- file_page_index(bitmap, 0)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||||
|
|
||||||
while (pages--)
|
while (pages--)
|
||||||
if (map[pages]->index != 0) /* 0 is sb_page, release it below */
|
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
|
||||||
free_buffers(map[pages]);
|
free_buffers(map[pages]);
|
||||||
kfree(map);
|
kfree(map);
|
||||||
kfree(attr);
|
kfree(attr);
|
||||||
|
@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||||
|
|
||||||
page = filemap_get_page(bitmap, chunk);
|
page = filemap_get_page(bitmap, chunk);
|
||||||
if (!page) return;
|
if (!page) return;
|
||||||
bit = file_page_offset(chunk);
|
bit = file_page_offset(bitmap, chunk);
|
||||||
|
|
||||||
/* set the bit */
|
/* set the bit */
|
||||||
kaddr = kmap_atomic(page, KM_USER0);
|
kaddr = kmap_atomic(page, KM_USER0);
|
||||||
|
@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||||
"recovery\n", bmname(bitmap));
|
"recovery\n", bmname(bitmap));
|
||||||
|
|
||||||
bytes = (chunks + 7) / 8;
|
bytes = (chunks + 7) / 8;
|
||||||
|
if (!bitmap->mddev->bitmap_info.external)
|
||||||
|
bytes += sizeof(bitmap_super_t);
|
||||||
|
|
||||||
num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
|
|
||||||
|
num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||||
|
|
||||||
if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
|
if (file && i_size_read(file->f_mapping->host) < bytes) {
|
||||||
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
|
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
|
||||||
bmname(bitmap),
|
bmname(bitmap),
|
||||||
(unsigned long) i_size_read(file->f_mapping->host),
|
(unsigned long) i_size_read(file->f_mapping->host),
|
||||||
bytes + sizeof(bitmap_super_t));
|
bytes);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||||
|
|
||||||
for (i = 0; i < chunks; i++) {
|
for (i = 0; i < chunks; i++) {
|
||||||
int b;
|
int b;
|
||||||
index = file_page_index(i);
|
index = file_page_index(bitmap, i);
|
||||||
bit = file_page_offset(i);
|
bit = file_page_offset(bitmap, i);
|
||||||
if (index != oldindex) { /* this is a new page, read it in */
|
if (index != oldindex) { /* this is a new page, read it in */
|
||||||
int count;
|
int count;
|
||||||
/* unmap the old page, we're done with it */
|
/* unmap the old page, we're done with it */
|
||||||
if (index == num_pages-1)
|
if (index == num_pages-1)
|
||||||
count = bytes + sizeof(bitmap_super_t)
|
count = bytes - index * PAGE_SIZE;
|
||||||
- index * PAGE_SIZE;
|
|
||||||
else
|
else
|
||||||
count = PAGE_SIZE;
|
count = PAGE_SIZE;
|
||||||
if (index == 0) {
|
if (index == 0 && bitmap->sb_page) {
|
||||||
/*
|
/*
|
||||||
* if we're here then the superblock page
|
* if we're here then the superblock page
|
||||||
* contains some bits (PAGE_SIZE != sizeof sb)
|
* contains some bits (PAGE_SIZE != sizeof sb)
|
||||||
|
@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
|
||||||
/* We are possibly going to clear some bits, so make
|
/* We are possibly going to clear some bits, so make
|
||||||
* sure that events_cleared is up-to-date.
|
* sure that events_cleared is up-to-date.
|
||||||
*/
|
*/
|
||||||
if (bitmap->need_sync) {
|
if (bitmap->need_sync &&
|
||||||
|
bitmap->mddev->bitmap_info.external == 0) {
|
||||||
bitmap_super_t *sb;
|
bitmap_super_t *sb;
|
||||||
bitmap->need_sync = 0;
|
bitmap->need_sync = 0;
|
||||||
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||||
|
@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev)
|
||||||
write_page(bitmap, bitmap->sb_page, 1);
|
write_page(bitmap, bitmap->sb_page, 1);
|
||||||
}
|
}
|
||||||
spin_lock_irqsave(&bitmap->lock, flags);
|
spin_lock_irqsave(&bitmap->lock, flags);
|
||||||
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
if (!bitmap->need_sync)
|
||||||
|
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
||||||
}
|
}
|
||||||
bmc = bitmap_get_counter(bitmap,
|
bmc = bitmap_get_counter(bitmap,
|
||||||
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
|
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
|
||||||
|
@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
|
||||||
if (*bmc == 2) {
|
if (*bmc == 2) {
|
||||||
*bmc=1; /* maybe clear the bit next time */
|
*bmc=1; /* maybe clear the bit next time */
|
||||||
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
||||||
} else if (*bmc == 1) {
|
} else if (*bmc == 1 && !bitmap->need_sync) {
|
||||||
/* we can clear the bit */
|
/* we can clear the bit */
|
||||||
*bmc = 0;
|
*bmc = 0;
|
||||||
bitmap_count_page(bitmap,
|
bitmap_count_page(bitmap,
|
||||||
|
@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
|
||||||
/* clear the bit */
|
/* clear the bit */
|
||||||
paddr = kmap_atomic(page, KM_USER0);
|
paddr = kmap_atomic(page, KM_USER0);
|
||||||
if (bitmap->flags & BITMAP_HOSTENDIAN)
|
if (bitmap->flags & BITMAP_HOSTENDIAN)
|
||||||
clear_bit(file_page_offset(j), paddr);
|
clear_bit(file_page_offset(bitmap, j),
|
||||||
|
paddr);
|
||||||
else
|
else
|
||||||
ext2_clear_bit(file_page_offset(j), paddr);
|
ext2_clear_bit(file_page_offset(bitmap, j),
|
||||||
|
paddr);
|
||||||
kunmap_atomic(paddr, KM_USER0);
|
kunmap_atomic(paddr, KM_USER0);
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
|
@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
|
||||||
bitmap->events_cleared < bitmap->mddev->events) {
|
bitmap->events_cleared < bitmap->mddev->events) {
|
||||||
bitmap->events_cleared = bitmap->mddev->events;
|
bitmap->events_cleared = bitmap->mddev->events;
|
||||||
bitmap->need_sync = 1;
|
bitmap->need_sync = 1;
|
||||||
|
sysfs_notify_dirent(bitmap->sysfs_can_clear);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!success && ! (*bmc & NEEDED_MASK))
|
if (!success && ! (*bmc & NEEDED_MASK))
|
||||||
|
@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
|
||||||
if (mddev->thread)
|
if (mddev->thread)
|
||||||
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||||
|
|
||||||
|
if (bitmap->sysfs_can_clear)
|
||||||
|
sysfs_put(bitmap->sysfs_can_clear);
|
||||||
|
|
||||||
bitmap_free(bitmap);
|
bitmap_free(bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
|
||||||
struct file *file = mddev->bitmap_info.file;
|
struct file *file = mddev->bitmap_info.file;
|
||||||
int err;
|
int err;
|
||||||
sector_t start;
|
sector_t start;
|
||||||
|
struct sysfs_dirent *bm;
|
||||||
|
|
||||||
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
|
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
|
||||||
|
|
||||||
|
@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)
|
||||||
|
|
||||||
bitmap->mddev = mddev;
|
bitmap->mddev = mddev;
|
||||||
|
|
||||||
|
bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
|
||||||
|
if (bm) {
|
||||||
|
bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
|
||||||
|
sysfs_put(bm);
|
||||||
|
} else
|
||||||
|
bitmap->sysfs_can_clear = NULL;
|
||||||
|
|
||||||
bitmap->file = file;
|
bitmap->file = file;
|
||||||
if (file) {
|
if (file) {
|
||||||
get_file(file);
|
get_file(file);
|
||||||
|
@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
|
||||||
vfs_fsync(file, file->f_dentry, 1);
|
vfs_fsync(file, file->f_dentry, 1);
|
||||||
}
|
}
|
||||||
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
|
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
|
||||||
err = bitmap_read_sb(bitmap);
|
if (!mddev->bitmap_info.external)
|
||||||
|
err = bitmap_read_sb(bitmap);
|
||||||
|
else {
|
||||||
|
err = 0;
|
||||||
|
if (mddev->bitmap_info.chunksize == 0 ||
|
||||||
|
mddev->bitmap_info.daemon_sleep == 0)
|
||||||
|
/* chunksize and time_base need to be
|
||||||
|
* set first. */
|
||||||
|
err = -EINVAL;
|
||||||
|
}
|
||||||
if (err)
|
if (err)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
return rv;
|
return rv;
|
||||||
if (offset == 0)
|
if (offset == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (mddev->major_version == 0 &&
|
if (mddev->bitmap_info.external == 0 &&
|
||||||
|
mddev->major_version == 0 &&
|
||||||
offset != mddev->bitmap_info.default_offset)
|
offset != mddev->bitmap_info.default_offset)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
mddev->bitmap_info.offset = offset;
|
mddev->bitmap_info.offset = offset;
|
||||||
|
@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
static struct md_sysfs_entry bitmap_chunksize =
|
static struct md_sysfs_entry bitmap_chunksize =
|
||||||
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
|
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
|
||||||
|
|
||||||
|
static ssize_t metadata_show(mddev_t *mddev, char *page)
|
||||||
|
{
|
||||||
|
return sprintf(page, "%s\n", (mddev->bitmap_info.external
|
||||||
|
? "external" : "internal"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
if (mddev->bitmap ||
|
||||||
|
mddev->bitmap_info.file ||
|
||||||
|
mddev->bitmap_info.offset)
|
||||||
|
return -EBUSY;
|
||||||
|
if (strncmp(buf, "external", 8) == 0)
|
||||||
|
mddev->bitmap_info.external = 1;
|
||||||
|
else if (strncmp(buf, "internal", 8) == 0)
|
||||||
|
mddev->bitmap_info.external = 0;
|
||||||
|
else
|
||||||
|
return -EINVAL;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct md_sysfs_entry bitmap_metadata =
|
||||||
|
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
|
||||||
|
|
||||||
|
static ssize_t can_clear_show(mddev_t *mddev, char *page)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
if (mddev->bitmap)
|
||||||
|
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
|
||||||
|
"false" : "true"));
|
||||||
|
else
|
||||||
|
len = sprintf(page, "\n");
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
if (mddev->bitmap == NULL)
|
||||||
|
return -ENOENT;
|
||||||
|
if (strncmp(buf, "false", 5) == 0)
|
||||||
|
mddev->bitmap->need_sync = 1;
|
||||||
|
else if (strncmp(buf, "true", 4) == 0) {
|
||||||
|
if (mddev->degraded)
|
||||||
|
return -EBUSY;
|
||||||
|
mddev->bitmap->need_sync = 0;
|
||||||
|
} else
|
||||||
|
return -EINVAL;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct md_sysfs_entry bitmap_can_clear =
|
||||||
|
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
|
||||||
|
|
||||||
static struct attribute *md_bitmap_attrs[] = {
|
static struct attribute *md_bitmap_attrs[] = {
|
||||||
&bitmap_location.attr,
|
&bitmap_location.attr,
|
||||||
&bitmap_timeout.attr,
|
&bitmap_timeout.attr,
|
||||||
&bitmap_backlog.attr,
|
&bitmap_backlog.attr,
|
||||||
&bitmap_chunksize.attr,
|
&bitmap_chunksize.attr,
|
||||||
|
&bitmap_metadata.attr,
|
||||||
|
&bitmap_can_clear.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
struct attribute_group md_bitmap_group = {
|
struct attribute_group md_bitmap_group = {
|
||||||
|
|
|
@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
|
||||||
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
|
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
|
||||||
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
|
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
|
||||||
|
|
||||||
/*
|
|
||||||
* on-disk bitmap:
|
|
||||||
*
|
|
||||||
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
|
|
||||||
* file a page at a time. There's a superblock at the start of the file.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* map chunks (bits) to file pages - offset by the size of the superblock */
|
|
||||||
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -250,6 +240,7 @@ struct bitmap {
|
||||||
wait_queue_head_t write_wait;
|
wait_queue_head_t write_wait;
|
||||||
wait_queue_head_t overflow_wait;
|
wait_queue_head_t overflow_wait;
|
||||||
|
|
||||||
|
struct sysfs_dirent *sysfs_can_clear;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* the bitmap API */
|
/* the bitmap API */
|
||||||
|
|
|
@ -296,6 +296,7 @@ struct mddev_s
|
||||||
unsigned long chunksize;
|
unsigned long chunksize;
|
||||||
unsigned long daemon_sleep; /* how many seconds between updates? */
|
unsigned long daemon_sleep; /* how many seconds between updates? */
|
||||||
unsigned long max_write_behind; /* write-behind mode */
|
unsigned long max_write_behind; /* write-behind mode */
|
||||||
|
int external;
|
||||||
} bitmap_info;
|
} bitmap_info;
|
||||||
|
|
||||||
struct list_head all_mddevs;
|
struct list_head all_mddevs;
|
||||||
|
|
Loading…
Reference in a new issue