Merge branch 'bkl-removal' of git://git.lwn.net/linux-2.6

* 'bkl-removal' of git://git.lwn.net/linux-2.6:
  Rationalize fasync return values
  Move FASYNC bit handling to f_op->fasync()
  Use f_lock to protect f_flags
  Rename struct file->f_ep_lock
This commit is contained in:
Linus Torvalds 2009-03-26 16:14:02 -07:00
commit 8e9d208972
33 changed files with 90 additions and 113 deletions

View file

@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that
can and should be done using the internal locking with smaller critical areas).
Current worst offender is ext2_get_block()...
->fasync() is a mess. This area needs a big cleanup and that will probably
affect locking.
->fasync() is called without BKL protection, and is responsible for
maintaining the FASYNC bit in filp->f_flags. Most instances call
fasync_helper(), which does that maintenance, so it's not normally
something one needs to worry about. Return values > 0 will be mapped to
zero in the VFS layer.
->readdir() and ->ioctl() on directories must be changed. Ideally we would
move ->readdir() to inode_operations and use a separate method for directory

View file

@ -888,12 +888,7 @@ found:
static int sonypi_misc_fasync(int fd, struct file *filp, int on)
{
int retval;
retval = fasync_helper(fd, filp, on, &sonypi_device.fifo_async);
if (retval < 0)
return retval;
return 0;
return fasync_helper(fd, filp, on, &sonypi_device.fifo_async);
}
static int sonypi_misc_release(struct inode *inode, struct file *file)

View file

@ -2162,13 +2162,12 @@ static int fionbio(struct file *file, int __user *p)
if (get_user(nonblock, p))
return -EFAULT;
/* file->f_flags is still BKL protected in the fs layer - vomit */
lock_kernel();
spin_lock(&file->f_lock);
if (nonblock)
file->f_flags |= O_NONBLOCK;
else
file->f_flags &= ~O_NONBLOCK;
unlock_kernel();
spin_unlock(&file->f_lock);
return 0;
}

View file

@ -337,14 +337,10 @@ int drm_fasync(int fd, struct file *filp, int on)
{
struct drm_file *priv = filp->private_data;
struct drm_device *dev = priv->minor->dev;
int retcode;
DRM_DEBUG("fd = %d, device = 0x%lx\n", fd,
(long)old_encode_dev(priv->minor->device));
retcode = fasync_helper(fd, filp, on, &dev->buf_async);
if (retcode < 0)
return retcode;
return 0;
return fasync_helper(fd, filp, on, &dev->buf_async);
}
EXPORT_SYMBOL(drm_fasync);

View file

@ -227,12 +227,9 @@ void hiddev_report_event(struct hid_device *hid, struct hid_report *report)
*/
static int hiddev_fasync(int fd, struct file *file, int on)
{
int retval;
struct hiddev_list *list = file->private_data;
retval = fasync_helper(fd, file, on, &list->fasync);
return retval < 0 ? retval : 0;
return fasync_helper(fd, file, on, &list->fasync);
}

View file

@ -1325,11 +1325,7 @@ static int dv1394_fasync(int fd, struct file *file, int on)
struct video_card *video = file_to_video_card(file);
int retval = fasync_helper(fd, file, on, &video->fasync);
if (retval < 0)
return retval;
return 0;
return fasync_helper(fd, file, on, &video->fasync);
}
static ssize_t dv1394_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)

View file

@ -94,11 +94,8 @@ static void evdev_event(struct input_handle *handle,
static int evdev_fasync(int fd, struct file *file, int on)
{
struct evdev_client *client = file->private_data;
int retval;
retval = fasync_helper(fd, file, on, &client->fasync);
return retval < 0 ? retval : 0;
return fasync_helper(fd, file, on, &client->fasync);
}
static int evdev_flush(struct file *file, fl_owner_t id)

View file

@ -159,12 +159,9 @@ static void joydev_event(struct input_handle *handle,
static int joydev_fasync(int fd, struct file *file, int on)
{
int retval;
struct joydev_client *client = file->private_data;
retval = fasync_helper(fd, file, on, &client->fasync);
return retval < 0 ? retval : 0;
return fasync_helper(fd, file, on, &client->fasync);
}
static void joydev_free(struct device *dev)

View file

@ -403,12 +403,9 @@ static void mousedev_event(struct input_handle *handle,
static int mousedev_fasync(int fd, struct file *file, int on)
{
int retval;
struct mousedev_client *client = file->private_data;
retval = fasync_helper(fd, file, on, &client->fasync);
return retval < 0 ? retval : 0;
return fasync_helper(fd, file, on, &client->fasync);
}
static void mousedev_free(struct device *dev)

View file

@ -58,10 +58,8 @@ static unsigned int serio_raw_no;
static int serio_raw_fasync(int fd, struct file *file, int on)
{
struct serio_raw_list *list = file->private_data;
int retval;
retval = fasync_helper(fd, file, on, &list->fasync);
return retval < 0 ? retval : 0;
return fasync_helper(fd, file, on, &list->fasync);
}
static struct serio_raw *serio_raw_locate(int minor)

View file

@ -998,8 +998,8 @@ static struct fasync_struct *fasync[256] = { NULL, };
static int cosa_fasync(struct inode *inode, struct file *file, int on)
{
int port = iminor(inode);
int rv = fasync_helper(inode, file, on, &fasync[port]);
return rv < 0 ? rv : 0;
return fasync_helper(inode, file, on, &fasync[port]);
}
#endif

View file

@ -1917,12 +1917,7 @@ static struct sonypi_compat_s sonypi_compat = {
static int sonypi_misc_fasync(int fd, struct file *filp, int on)
{
int retval;
retval = fasync_helper(fd, filp, on, &sonypi_compat.fifo_async);
if (retval < 0)
return retval;
return 0;
return fasync_helper(fd, filp, on, &sonypi_compat.fifo_async);
}
static int sonypi_misc_release(struct inode *inode, struct file *file)

View file

@ -1154,7 +1154,6 @@ sg_poll(struct file *filp, poll_table * wait)
static int
sg_fasync(int fd, struct file *filp, int mode)
{
int retval;
Sg_device *sdp;
Sg_fd *sfp;
@ -1163,8 +1162,7 @@ sg_fasync(int fd, struct file *filp, int mode)
SCSI_LOG_TIMEOUT(3, printk("sg_fasync: %s, mode=%d\n",
sdp->disk->disk_name, mode));
retval = fasync_helper(fd, filp, mode, &sfp->async_qp);
return (retval < 0) ? retval : 0;
return fasync_helper(fd, filp, mode, &sfp->async_qp);
}
static int

View file

@ -1711,7 +1711,9 @@ static int do_write(struct fsg_dev *fsg)
curlun->sense_data = SS_WRITE_PROTECTED;
return -EINVAL;
}
spin_lock(&curlun->filp->f_lock);
curlun->filp->f_flags &= ~O_SYNC; // Default is not to wait
spin_unlock(&curlun->filp->f_lock);
/* Get the starting Logical Block Address and check that it's
* not too big */
@ -1728,8 +1730,11 @@ static int do_write(struct fsg_dev *fsg)
curlun->sense_data = SS_INVALID_FIELD_IN_CDB;
return -EINVAL;
}
if (fsg->cmnd[1] & 0x08) // FUA
if (fsg->cmnd[1] & 0x08) { // FUA
spin_lock(&curlun->filp->f_lock);
curlun->filp->f_flags |= O_SYNC;
spin_unlock(&curlun->filp->f_lock);
}
}
if (lba >= curlun->num_sectors) {
curlun->sense_data = SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;

View file

@ -417,10 +417,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
ep_unregister_pollwait(ep, epi);
/* Remove the current item from the list of epoll hooks */
spin_lock(&file->f_ep_lock);
spin_lock(&file->f_lock);
if (ep_is_linked(&epi->fllink))
list_del_init(&epi->fllink);
spin_unlock(&file->f_ep_lock);
spin_unlock(&file->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@ -538,7 +538,7 @@ void eventpoll_release_file(struct file *file)
struct epitem *epi;
/*
* We don't want to get "file->f_ep_lock" because it is not
* We don't want to get "file->f_lock" because it is not
* necessary. It is not necessary because we're in the "struct file"
* cleanup path, and this means that noone is using this file anymore.
* So, for example, epoll_ctl() cannot hit here sicne if we reach this
@ -547,6 +547,8 @@ void eventpoll_release_file(struct file *file)
* will correctly serialize the operation. We do need to acquire
* "ep->mtx" after "epmutex" because ep_remove() requires it when called
* from anywhere but ep_free().
*
* Besides, ep_remove() acquires the lock, so we can't hold it here.
*/
mutex_lock(&epmutex);
@ -785,9 +787,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
goto error_unregister;
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_ep_lock);
spin_lock(&tfile->f_lock);
list_add_tail(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_ep_lock);
spin_unlock(&tfile->f_lock);
/*
* Add the current item to the RB tree. All RB tree operations are

View file

@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
return ret;
}
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
static int setfl(int fd, struct file * filp, unsigned long arg)
{
@ -177,21 +177,21 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
return error;
/*
* We still need a lock here for now to keep multiple FASYNC calls
* from racing with each other.
* ->fasync() is responsible for setting the FASYNC bit.
*/
lock_kernel();
if ((arg ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync) {
if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
filp->f_op->fasync) {
error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
if (error < 0)
goto out;
if (error > 0)
error = 0;
}
}
spin_lock(&filp->f_lock);
filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
spin_unlock(&filp->f_lock);
out:
unlock_kernel();
return error;
}
@ -516,7 +516,7 @@ static DEFINE_RWLOCK(fasync_lock);
static struct kmem_cache *fasync_cache __read_mostly;
/*
* fasync_helper() is used by some character device drivers (mainly mice)
* fasync_helper() is used by almost all character device drivers
* to set up the fasync queue. It returns negative on error, 0 if it did
* no changes and positive if it added/deleted the entry.
*/
@ -555,6 +555,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
result = 1;
}
out:
/* Fix up FASYNC bit while still holding fasync_lock */
spin_lock(&filp->f_lock);
if (on)
filp->f_flags |= FASYNC;
else
filp->f_flags &= ~FASYNC;
spin_unlock(&filp->f_lock);
write_unlock_irq(&fasync_lock);
return result;
}

View file

@ -128,6 +128,7 @@ struct file *get_empty_filp(void)
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
f->f_cred = get_cred(cred);
spin_lock_init(&f->f_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;

View file

@ -404,10 +404,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
if (O_NONBLOCK != O_NDELAY)
flag |= O_NDELAY;
#endif
spin_lock(&filp->f_lock);
if (on)
filp->f_flags |= flag;
else
filp->f_flags &= ~flag;
spin_unlock(&filp->f_lock);
return error;
}
@ -425,18 +427,12 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
/* Did FASYNC state change ? */
if ((flag ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync)
/* fasync() adjusts filp->f_flags */
error = filp->f_op->fasync(fd, filp, on);
else
error = -ENOTTY;
}
if (error)
return error;
if (on)
filp->f_flags |= FASYNC;
else
filp->f_flags &= ~FASYNC;
return error;
return error < 0 ? error : 0;
}
static int ioctl_fsfreeze(struct file *filp)
@ -499,17 +495,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
break;
case FIONBIO:
/* BKL needed to avoid races tweaking f_flags */
lock_kernel();
error = ioctl_fionbio(filp, argp);
unlock_kernel();
break;
case FIOASYNC:
/* BKL needed to avoid races tweaking f_flags */
lock_kernel();
error = ioctl_fioasync(fd, filp, argp);
unlock_kernel();
break;
case FIOQSIZE:

View file

@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (!EX_ISSYNC(exp))
stable = 0;
if (stable && !EX_WGATHER(exp))
if (stable && !EX_WGATHER(exp)) {
spin_lock(&file->f_lock);
file->f_flags |= O_SYNC;
spin_unlock(&file->f_lock);
}
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);

View file

@ -667,10 +667,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
mutex_unlock(&inode->i_mutex);
if (retval < 0)
return retval;
return 0;
}
@ -684,10 +681,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
mutex_unlock(&inode->i_mutex);
if (retval < 0)
return retval;
return 0;
}
@ -706,11 +700,7 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
}
mutex_unlock(&inode->i_mutex);
if (retval < 0)
return retval;
return 0;
}

View file

@ -61,7 +61,6 @@ struct file;
static inline void eventpoll_init_file(struct file *file)
{
INIT_LIST_HEAD(&file->f_ep_links);
spin_lock_init(&file->f_ep_lock);
}

View file

@ -849,6 +849,7 @@ struct file {
#define f_dentry f_path.dentry
#define f_vfsmnt f_path.mnt
const struct file_operations *f_op;
spinlock_t f_lock; /* f_ep_links, f_flags */
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
@ -867,7 +868,6 @@ struct file {
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
#ifdef CONFIG_DEBUG_WRITECOUNT

View file

@ -1156,10 +1156,12 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
if (u_mqstat) {
audit_mq_getsetattr(mqdes, &mqstat);
spin_lock(&filp->f_lock);
if (mqstat.mq_flags & O_NONBLOCK)
filp->f_flags |= O_NONBLOCK;
else
filp->f_flags &= ~O_NONBLOCK;
spin_unlock(&filp->f_lock);
inode->i_atime = inode->i_ctime = CURRENT_TIME;
}

View file

@ -1074,6 +1074,13 @@ static int sock_fasync(int fd, struct file *filp, int on)
lock_sock(sk);
spin_lock(&filp->f_lock);
if (on)
filp->f_flags |= FASYNC;
else
filp->f_flags &= ~FASYNC;
spin_unlock(&filp->f_lock);
prev = &(sock->fasync_list);
for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)

View file

@ -1373,12 +1373,9 @@ EXPORT_SYMBOL(snd_ctl_unregister_ioctl_compat);
static int snd_ctl_fasync(int fd, struct file * file, int on)
{
struct snd_ctl_file *ctl;
int err;
ctl = file->private_data;
err = fasync_helper(fd, file, on, &ctl->fasync);
if (err < 0)
return err;
return 0;
return fasync_helper(fd, file, on, &ctl->fasync);
}
/*

View file

@ -1903,7 +1903,9 @@ static int snd_pcm_oss_set_fragment(struct snd_pcm_oss_file *pcm_oss_file, unsig
static int snd_pcm_oss_nonblock(struct file * file)
{
spin_lock(&file->f_lock);
file->f_flags |= O_NONBLOCK;
spin_unlock(&file->f_lock);
return 0;
}

View file

@ -3246,9 +3246,7 @@ static int snd_pcm_fasync(int fd, struct file * file, int on)
err = fasync_helper(fd, file, on, &runtime->fasync);
out:
unlock_kernel();
if (err < 0)
return err;
return 0;
}
/*

View file

@ -1825,13 +1825,9 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd,
static int snd_timer_user_fasync(int fd, struct file * file, int on)
{
struct snd_timer_user *tu;
int err;
tu = file->private_data;
err = fasync_helper(fd, file, on, &tu->fasync);
if (err < 0)
return err;
return 0;
return fasync_helper(fd, file, on, &tu->fasync);
}
static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,

View file

@ -1627,7 +1627,9 @@ au1550_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
sizeof(abinfo)) ? -EFAULT : 0;
case SNDCTL_DSP_NONBLOCK:
spin_lock(&file->f_lock);
file->f_flags |= O_NONBLOCK;
spin_unlock(&file->f_lock);
return 0;
case SNDCTL_DSP_GETODELAY:

View file

@ -433,7 +433,9 @@ int audio_ioctl(int dev, struct file *file, unsigned int cmd, void __user *arg)
return dma_ioctl(dev, cmd, arg);
case SNDCTL_DSP_NONBLOCK:
spin_lock(&file->f_lock);
file->f_flags |= O_NONBLOCK;
spin_unlock(&file->f_lock);
return 0;
case SNDCTL_DSP_GETCAPS:

View file

@ -135,7 +135,9 @@ static int dac_audio_ioctl(struct inode *inode, struct file *file,
return put_user(AFMT_U8, (int *)arg);
case SNDCTL_DSP_NONBLOCK:
spin_lock(&file->f_lock);
file->f_flags |= O_NONBLOCK;
spin_unlock(&file->f_lock);
return 0;
case SNDCTL_DSP_GETCAPS:

View file

@ -2200,7 +2200,9 @@ static int cs4297a_ioctl(struct inode *inode, struct file *file,
sizeof(abinfo)) ? -EFAULT : 0;
case SNDCTL_DSP_NONBLOCK:
spin_lock(&file->f_lock);
file->f_flags |= O_NONBLOCK;
spin_unlock(&file->f_lock);
return 0;
case SNDCTL_DSP_GETODELAY:

View file

@ -2673,7 +2673,9 @@ static int vwsnd_audio_do_ioctl(struct inode *inode,
case SNDCTL_DSP_NONBLOCK: /* _SIO ('P',14) */
DBGX("SNDCTL_DSP_NONBLOCK\n");
spin_lock(&file->f_lock);
file->f_flags |= O_NONBLOCK;
spin_unlock(&file->f_lock);
return 0;
case SNDCTL_DSP_RESET: /* _SIO ('P', 0) */