Quota fixes in 2.3.26

Jan Kara (jack@atrey.karlin.mff.cuni.cz)
Fri, 12 Nov 1999 10:29:00 +0100


--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii

Hello.

I'm sending you fixes to quota for 2.3.26. They fix the same problems that
were in 2.2 - deadlocks on write_dquot(), races in dquot_transfer(),
bugs in quota initialization and finalization. I moved remove_dquot_ref()
to inode.c as it now searches through inodes. I've also changed design of
this function - we gather all dquots for freeing and then free them together
- so the code is simplier a lot because we don't have to restart after each
blocking in dqput(). I also fixed division problems that arised when we
changed i_size from size_t to loff_t on i386 and bugs caused by removing
of select_dcache() (or how it was called) from dcache. The patch
agains 2.3.26 is attached.

Honza.

--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="quota-fix-2.3.26-1.diff"

--- linux/include/linux/fs.h Sat Nov 6 19:20:00 1999
+++ linux/include/linux/fs.h Sat Nov 6 19:25:31 1999
@@ -676,7 +676,7 @@
int (*alloc_inode) (const struct inode *, unsigned long, uid_t);
void (*free_block) (const struct inode *, unsigned long);
void (*free_inode) (const struct inode *, unsigned long);
- int (*transfer) (struct inode *, struct iattr *, char, uid_t);
+ int (*transfer) (struct dentry *, struct iattr *, uid_t);
};

struct file_system_type {
--- linux/include/linux/mount.h Sat Oct 23 23:34:58 1999
+++ linux/include/linux/mount.h Sun Oct 24 01:07:10 1999
@@ -17,11 +17,12 @@
struct quota_mount_options
{
unsigned int flags; /* Flags for diskquotas on this device */
- struct semaphore semaphore; /* lock device while I/O in progress */
+ struct semaphore dqio_sem; /* lock device while I/O in progress */
+ struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */
struct file *files[MAXQUOTAS]; /* fp's to quotafiles */
time_t inode_expire[MAXQUOTAS]; /* expiretime for inode-quota */
time_t block_expire[MAXQUOTAS]; /* expiretime for block-quota */
- char rsquash[MAXQUOTAS]; /* for quotas treat root as any other user */
+ char rsquash[MAXQUOTAS]; /* for quotas threat root as any other user */
};

struct vfsmount
--- linux/include/linux/quota.h Thu May 13 23:29:31 1999
+++ linux/include/linux/quota.h Sun Oct 10 12:57:31 1999
@@ -202,7 +202,7 @@
# /* nodep */ include <sys/cdefs.h>

__BEGIN_DECLS
-int quotactl __P ((int, const char *, int, caddr_t));
+long quotactl __P ((int, const char *, int, caddr_t));
__END_DECLS

#endif /* __KERNEL__ */
--- linux/include/linux/quotaops.h Tue Sep 7 20:51:33 1999
+++ linux/include/linux/quotaops.h Sun Oct 10 12:40:01 1999
@@ -31,8 +31,8 @@
extern void dquot_free_block(const struct inode *inode, unsigned long number);
extern void dquot_free_inode(const struct inode *inode, unsigned long number);

-extern int dquot_transfer(struct inode *inode, struct iattr *iattr,
- char direction, uid_t initiator);
+extern int dquot_transfer(struct dentry *dentry, struct iattr *iattr,
+ uid_t initiator);

/*
* Operations supported for diskquotas.
@@ -99,17 +99,11 @@
int error = -EDQUOT;

if (dentry->d_inode->i_sb->dq_op) {
- if (IS_QUOTAINIT(dentry->d_inode) == 0)
- dentry->d_inode->i_sb->dq_op->initialize(dentry->d_inode, -1);
- if (dentry->d_inode->i_sb->dq_op->transfer(dentry->d_inode, iattr, 0, current->fsuid))
- goto out;
- error = notify_change(dentry, iattr);
- if (error)
- dentry->d_inode->i_sb->dq_op->transfer(dentry->d_inode, iattr, 1, current->fsuid);
+ dentry->d_inode->i_sb->dq_op->initialize(dentry->d_inode, -1);
+ error = dentry->d_inode->i_sb->dq_op->transfer(dentry, iattr, current->fsuid);
} else {
error = notify_change(dentry, iattr);
}
-out:
return error;
}

--- linux/fs/super.c Sat Oct 23 23:36:29 1999
+++ linux/fs/super.c Sun Oct 24 01:06:12 1999
@@ -104,7 +104,8 @@
lptr->mnt_dev = sb->s_dev;
lptr->mnt_flags = sb->s_flags;

- sema_init(&lptr->mnt_dquot.semaphore, 1);
+ sema_init(&lptr->mnt_dquot.dqio_sem, 1);
+ sema_init(&lptr->mnt_dquot.dqoff_sem, 1);
lptr->mnt_dquot.flags = 0;

/* N.B. Is it really OK to have a vfsmount without names? */
--- linux/fs/inode.c Sat Nov 6 19:19:57 1999
+++ linux/fs/inode.c Sat Nov 6 19:26:25 1999
@@ -756,3 +756,47 @@
inode->i_atime = CURRENT_TIME;
mark_inode_dirty (inode);
} /* End Function update_atime */
+
+/*
+ * Quota functions
+ */
+
+/* Functions back in dquot.c */
+void put_dquot_list(struct list_head *);
+int remove_inode_dquot_ref(struct inode *, short, struct list_head *);
+
+void remove_dquot_ref(kdev_t dev, short type)
+{
+ struct super_block *sb = get_super(dev);
+ struct inode *inode;
+ struct list_head *act_head;
+ LIST_HEAD(tofree_head);
+
+ if (!sb || !sb->dq_op)
+ return; /* nothing to do */
+
+ /* We have to be protected against other CPUs */
+ spin_lock(&inode_lock);
+
+ for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
+ continue;
+ remove_inode_dquot_ref(inode, type, &tofree_head);
+ }
+ for (act_head = inode_unused.next; act_head != &inode_unused; act_head = act_head->next) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
+ continue;
+ remove_inode_dquot_ref(inode, type, &tofree_head);
+ }
+ for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (!IS_QUOTAINIT(inode))
+ continue;
+ remove_inode_dquot_ref(inode, type, &tofree_head);
+ }
+ spin_unlock(&inode_lock);
+
+ put_dquot_list(&tofree_head);
+}
--- linux/fs/dquot.c Sat Nov 6 20:00:25 1999
+++ linux/fs/dquot.c Thu Nov 11 22:13:50 1999
@@ -21,6 +21,19 @@
* Revised list management to avoid races
* -- Bill Hawes, <whawes@star.net>, 9/98
*
+ * Fixed races in dquot_transfer(), dqget() and dquot_alloc_...().
+ * As the consequence the locking was moved from dquot_decr_...(),
+ * dquot_incr_...() to calling functions.
+ * invalidate_dquots() now writes modified dquots.
+ * Serialized quota_off() and quota_on() for mount point.
+ * Fixed a few bugs in grow_dquots.
+ * Fixed deadlock in write_dquot() - we no longer account quotas on
+ * quota files
+ * remove_dquot_ref() moved to inode.c - it now traverses through inodes
+ * add_dquot_ref() restarts after blocking
+ * Added check for bogus uid and fixed check for group in quotactl.
+ * Jan Kara, <jack@suse.cz>, sponsored by SuSE CR, 10-11/99
+ *
* (C) Copyright 1994 - 1997 Marco van Wieringen
*/

@@ -74,7 +87,7 @@
*/

static struct dquot *inuse_list = NULL;
-LIST_HEAD(free_dquots);
+static LIST_HEAD(free_dquots);
static struct dquot *dquot_hash[NR_DQHASH];
static int dquot_updating[NR_DQHASH];

@@ -82,6 +95,9 @@
static DECLARE_WAIT_QUEUE_HEAD(dquot_wait);
static DECLARE_WAIT_QUEUE_HEAD(update_wait);

+static void dqput(struct dquot *);
+static struct dquot *dqduplicate(struct dquot *);
+
static inline char is_enabled(struct vfsmount *vfsmnt, short type)
{
switch (type) {
@@ -166,7 +182,8 @@
{
/* sanity check */
if (list_empty(&dquot->dq_free)) {
- printk("remove_free_dquot: dquot not on free list??\n");
+ printk("remove_free_dquot: dquot not on the free list??\n");
+ return; /* J.K. Just don't do anything */
}
list_del(&dquot->dq_free);
INIT_LIST_HEAD(&dquot->dq_free);
@@ -226,16 +243,25 @@
wake_up(&dquot->dq_wait);
}

+/*
+ * We don't have to be afraid of deadlocks as we never have quotas on quota files...
+ */
static void write_dquot(struct dquot *dquot)
{
short type = dquot->dq_type;
- struct file *filp = dquot->dq_mnt->mnt_dquot.files[type];
+ struct file *filp;
mm_segment_t fs;
loff_t offset;
ssize_t ret;
+ struct semaphore *sem = &dquot->dq_mnt->mnt_dquot.dqio_sem;

lock_dquot(dquot);
- down(&dquot->dq_mnt->mnt_dquot.semaphore);
+ if (!dquot->dq_mnt) { /* Invalidated quota? */
+ unlock_dquot(dquot);
+ return;
+ }
+ down(sem);
+ filp = dquot->dq_mnt->mnt_dquot.files[type];
offset = dqoff(dquot->dq_id);
fs = get_fs();
set_fs(KERNEL_DS);
@@ -253,40 +279,42 @@
printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
kdevname(dquot->dq_dev));

- up(&dquot->dq_mnt->mnt_dquot.semaphore);
set_fs(fs);
-
+ up(sem);
unlock_dquot(dquot);
+
dqstats.writes++;
}

static void read_dquot(struct dquot *dquot)
{
- short type;
+ short type = dquot->dq_type;
struct file *filp;
mm_segment_t fs;
loff_t offset;

- type = dquot->dq_type;
filp = dquot->dq_mnt->mnt_dquot.files[type];
-
if (filp == (struct file *)NULL)
return;

lock_dquot(dquot);
- down(&dquot->dq_mnt->mnt_dquot.semaphore);
+ if (!dquot->dq_mnt) /* Invalidated quota? */
+ goto out_lock;
+ /* Now we are sure filp is valid - the dquot isn't invalidated */
+ down(&dquot->dq_mnt->mnt_dquot.dqio_sem);
offset = dqoff(dquot->dq_id);
fs = get_fs();
set_fs(KERNEL_DS);
filp->f_op->read(filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk), &offset);
- up(&dquot->dq_mnt->mnt_dquot.semaphore);
+ up(&dquot->dq_mnt->mnt_dquot.dqio_sem);
set_fs(fs);

if (dquot->dq_bhardlimit == 0 && dquot->dq_bsoftlimit == 0 &&
dquot->dq_ihardlimit == 0 && dquot->dq_isoftlimit == 0)
dquot->dq_flags |= DQ_FAKE;
- unlock_dquot(dquot);
dqstats.reads++;
+out_lock:
+ unlock_dquot(dquot);
}

/*
@@ -306,10 +334,11 @@

void invalidate_dquots(kdev_t dev, short type)
{
- struct dquot *dquot, *next = inuse_list;
+ struct dquot *dquot, *next;
int need_restart;

restart:
+ next = inuse_list; /* Here it is better. Otherwise the restart doesn't have any sense ;-) */
need_restart = 0;
while ((dquot = next) != NULL) {
next = dquot->dq_next;
@@ -317,6 +346,8 @@
continue;
if (dquot->dq_type != type)
continue;
+ if (!dquot->dq_mnt) /* Already invalidated entry? */
+ continue;
if (dquot->dq_flags & DQ_LOCKED) {
__wait_on_dquot(dquot);

@@ -329,6 +360,18 @@
continue;
if (dquot->dq_type != type)
continue;
+ if (!dquot->dq_mnt)
+ continue;
+ }
+ /*
+ * Because inodes needn't to be the only holders of dquot
+ * the quota needn't to be written to disk. So we write it
+ * ourselves before discarding the data just for sure...
+ */
+ if (dquot->dq_flags & DQ_MOD && dquot->dq_mnt)
+ {
+ write_dquot(dquot);
+ need_restart = 1; /* We slept on IO */
}
clear_dquot(dquot);
}
@@ -342,10 +385,11 @@

int sync_dquots(kdev_t dev, short type)
{
- struct dquot *dquot, *next = inuse_list;
+ struct dquot *dquot, *next, *ddquot;
int need_restart;

restart:
+ next = inuse_list;
need_restart = 0;
while ((dquot = next) != NULL) {
next = dquot->dq_next;
@@ -353,12 +397,16 @@
continue;
if (type != -1 && dquot->dq_type != type)
continue;
+ if (!dquot->dq_mnt) /* Invalidated? */
+ continue;
if (!(dquot->dq_flags & (DQ_LOCKED | DQ_MOD)))
continue;

- wait_on_dquot(dquot);
- if (dquot->dq_flags & DQ_MOD)
- write_dquot(dquot);
+ if ((ddquot = dqduplicate(dquot)) == NODQUOT)
+ continue;
+ if (ddquot->dq_flags & DQ_MOD)
+ write_dquot(ddquot);
+ dqput(ddquot);
/* Set the flag for another pass. */
need_restart = 1;
}
@@ -373,7 +421,8 @@
return(0);
}

-void dqput(struct dquot *dquot)
+/* NOTE: If you change this function please check whether dqput_blocks() works right... */
+static void dqput(struct dquot *dquot)
{
if (!dquot)
return;
@@ -390,42 +439,45 @@
* checking and doesn't need to be written. It's just an empty
* dquot that is put back on to the freelist.
*/
- if (dquot->dq_mnt != (struct vfsmount *)NULL) {
+ if (dquot->dq_mnt)
dqstats.drops++;
we_slept:
- wait_on_dquot(dquot);
- if (dquot->dq_count > 1) {
- dquot->dq_count--;
- return;
- }
- if (dquot->dq_flags & DQ_MOD) {
- write_dquot(dquot);
- goto we_slept;
- }
+ if (dquot->dq_count > 1) {
+ /* We have more than one user... We can simply decrement use count */
+ dquot->dq_count--;
+ return;
+ }
+ if (dquot->dq_flags & DQ_LOCKED) {
+ printk(KERN_ERR "VFS: Locked quota to be put on the free list.\n");
+ dquot->dq_flags &= ~DQ_LOCKED;
+ }
+ if (dquot->dq_mnt && dquot->dq_flags & DQ_MOD) {
+ write_dquot(dquot);
+ goto we_slept;
}

/* sanity check */
if (!list_empty(&dquot->dq_free)) {
- printk("dqput: dquot already on free list??\n");
- }
- if (--dquot->dq_count == 0) {
- /* Place at end of LRU free queue */
- put_dquot_last(dquot);
- wake_up(&dquot_wait);
+ printk(KERN_ERR "dqput: dquot already on free list??\n");
+ dquot->dq_count--; /* J.K. Just decrementing use count seems safer... */
+ return;
}
-
- return;
+ dquot->dq_count--;
+ dquot->dq_flags &= ~DQ_MOD; /* Modified flag has no sense on free list */
+ /* Place at end of LRU free queue */
+ put_dquot_last(dquot);
+ wake_up(&dquot_wait);
}

-static void grow_dquots(void)
+static int grow_dquots(void)
{
struct dquot *dquot;
- int cnt = 32;
+ int cnt = 0;

- while (cnt > 0) {
+ while (cnt < 32) {
dquot = kmem_cache_alloc(dquot_cachep, SLAB_KERNEL);
if(!dquot)
- return;
+ return cnt;

nr_dquots++;
memset((caddr_t)dquot, 0, sizeof(struct dquot));
@@ -433,8 +485,9 @@
/* all dquots go on the inuse_list */
put_inuse(dquot);
put_dquot_head(dquot);
- cnt--;
+ cnt++;
}
+ return cnt;
}

static struct dquot *find_best_candidate_weighted(void)
@@ -446,6 +499,7 @@

while ((tmp = tmp->next) != &free_dquots && --limit) {
dquot = list_entry(tmp, struct dquot, dq_free);
+ /* This should never happen... */
if (dquot->dq_flags & (DQ_LOCKED | DQ_MOD))
continue;
myscore = dquot->dq_referenced;
@@ -474,27 +528,16 @@
struct dquot *get_empty_dquot(void)
{
struct dquot *dquot;
- int count;
+ int shrink = 1; /* Number of times we should try to shrink dcache and icache */

repeat:
dquot = find_best_free();
if (!dquot)
goto pressure;
got_it:
- if (dquot->dq_flags & (DQ_LOCKED | DQ_MOD)) {
- wait_on_dquot(dquot);
- if (dquot->dq_flags & DQ_MOD)
- {
- if(dquot->dq_mnt != (struct vfsmount *)NULL)
- write_dquot(dquot);
- }
- /*
- * The dquot may be back in use now, so we
- * must recheck the free list.
- */
- goto repeat;
- }
- /* sanity check ... */
+ /* Sanity checks */
+ if (dquot->dq_flags & DQ_LOCKED)
+ printk(KERN_ERR "VFS: Locked dquot on the free list\n");
if (dquot->dq_count != 0)
printk(KERN_ERR "VFS: free dquot count=%d\n", dquot->dq_count);

@@ -505,10 +548,9 @@
return dquot;

pressure:
- if (nr_dquots < max_dquots) {
- grow_dquots();
- goto repeat;
- }
+ if (nr_dquots < max_dquots)
+ if (grow_dquots())
+ goto repeat;

dquot = find_best_candidate_weighted();
if (dquot)
@@ -516,10 +558,11 @@
/*
* Try pruning the dcache to free up some dquots ...
*/
- if (count) {
- printk(KERN_DEBUG "get_empty_dquot: pruning %d\n", count);
- prune_dcache(count);
- prune_icache(count);
+ if (shrink) {
+ printk(KERN_DEBUG "get_empty_dquot: pruning dcache and icache\n");
+ prune_dcache(128);
+ prune_icache(128);
+ shrink--;
goto repeat;
}

@@ -534,7 +577,7 @@
struct dquot *dquot, *empty = NULL;
struct vfsmount *vfsmnt;

- if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL || is_enabled(vfsmnt, type) == 0)
+ if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL || !is_enabled(vfsmnt, type))
return(NODQUOT);

we_slept:
@@ -567,12 +610,67 @@
while (dquot_updating[hashent])
sleep_on(&update_wait);

+ if (!dquot->dq_mnt) { /* Has somebody invalidated entry under us? */
+ /*
+ * Do it as if the quota was invalidated before we started
+ */
+ dqput(dquot);
+ return NODQUOT;
+ }
dquot->dq_referenced++;
dqstats.lookups++;

return dquot;
}

+static struct dquot *dqduplicate(struct dquot *dquot)
+{
+ if (dquot == NODQUOT || !dquot->dq_mnt)
+ return NODQUOT;
+ dquot->dq_count++;
+ wait_on_dquot(dquot);
+ if (!dquot->dq_mnt) {
+ dquot->dq_count--;
+ return NODQUOT;
+ }
+ dquot->dq_referenced++;
+ dqstats.lookups++;
+ return dquot;
+}
+
+/* Check whether this inode is quota file */
+static inline int is_quotafile(struct inode *inode)
+{
+ int cnt;
+ struct vfsmount *vfsmnt;
+ struct file **files;
+
+ vfsmnt = lookup_vfsmnt(inode->i_dev);
+ if (!vfsmnt)
+ return 0;
+ files = vfsmnt->mnt_dquot.files;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (files[cnt] && files[cnt]->f_dentry->d_inode == inode)
+ return 1;
+ return 0;
+}
+
+static int dqinit_needed(struct inode *inode, short type)
+{
+ int cnt;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
+ return 0;
+ if (is_quotafile(inode))
+ return 0;
+ if (type != -1)
+ return inode->i_dquot[type] == NODQUOT;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt] == NODQUOT)
+ return 1;
+ return 0;
+}
+
static void add_dquot_ref(kdev_t dev, short type)
{
struct super_block *sb = get_super(dev);
@@ -582,6 +680,7 @@
if (!sb || !sb->dq_op)
return; /* nothing to do */

+restart:
file_list_lock();
for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
struct file *filp = list_entry(p, struct file, f_list);
@@ -590,84 +689,81 @@
inode = filp->f_dentry->d_inode;
if (!inode)
continue;
- /* N.B. race problem -- filp could become unused */
- if (filp->f_mode & FMODE_WRITE) {
+ if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
file_list_unlock();
sb->dq_op->initialize(inode, type);
inode->i_flags |= S_QUOTA;
- file_list_lock();
+ /* As we may have blocked we had better restart... */
+ goto restart;
}
}
file_list_unlock();
}

-static void reset_dquot_ptrs(kdev_t dev, short type)
+/* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */
+static inline int dqput_blocks(struct dquot *dquot)
{
- struct super_block *sb = get_super(dev);
- struct list_head *p;
- struct inode *inode;
- struct dquot *dquot;
+ if (dquot->dq_count == 1)
+ return 1;
+ return 0;
+}
+
+/* Remove references to dquots from inode - add dquot to list for freeing if needed */
+int remove_inode_dquot_ref(struct inode *inode, short type, struct list_head *tofree_head)
+{
+ struct dquot *dquot = inode->i_dquot[type];
int cnt;

- if (!sb || !sb->dq_op)
- return; /* nothing to do */
+ inode->i_dquot[type] = NODQUOT;
+ /* any other quota in use? */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt] != NODQUOT)
+ goto put_it;
+ }
+ inode->i_flags &= ~S_QUOTA;
+put_it:
+ if (dquot != NODQUOT)
+ if (dqput_blocks(dquot)) {
+ if (dquot->dq_count != 1)
+ printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", dquot->dq_count);
+ list_add(&dquot->dq_free, tofree_head); /* As dquot must have currently users it can't be on the free list... */
+ return 1;
+ }
+ else
+ dqput(dquot); /* We have guaranteed we won't block */
+ return 0;
+}

-restart:
- /* free any quota for unused dentries */
- shrink_dcache_sb(sb);
+/* Free list of dquots - called from inode.c */
+void put_dquot_list(struct list_head *tofree_head)
+{
+ struct list_head *act_head = tofree_head;
+ struct dquot *dquot;

- file_list_lock();
- for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
- struct file *filp = list_entry(p, struct file, f_list);
- if (!filp->f_dentry)
- continue;
- inode = filp->f_dentry->d_inode;
- if (!inode)
- continue;
- /*
- * Note: we restart after each blocking operation,
- * as the inuse_filps list may have changed.
- */
- if (IS_QUOTAINIT(inode)) {
- dquot = inode->i_dquot[type];
- inode->i_dquot[type] = NODQUOT;
- /* any other quota in use? */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT)
- goto put_it;
- }
- inode->i_flags &= ~S_QUOTA;
- put_it:
- if (dquot != NODQUOT) {
- file_list_unlock();
- dqput(dquot);
- /* we may have blocked ... */
- goto restart;
- }
- }
+ /* So now we have dquots on the list... Just free them */
+ while (act_head != tofree_head) {
+ dquot = list_entry(act_head, struct dquot, dq_free);
+ act_head = act_head->next;
+ list_del(&dquot->dq_free); /* Remove dquot from the list so we won't have problems... */
+ INIT_LIST_HEAD(&dquot->dq_free);
+ dqput(dquot);
}
- file_list_unlock();
}

static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
dquot->dq_curinodes += number;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline void dquot_incr_blocks(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
dquot->dq_curblocks += number;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
if (dquot->dq_curinodes > number)
dquot->dq_curinodes -= number;
else
@@ -676,12 +772,10 @@
dquot->dq_itime = (time_t) 0;
dquot->dq_flags &= ~DQ_INODES;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline void dquot_decr_blocks(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
if (dquot->dq_curblocks > number)
dquot->dq_curblocks -= number;
else
@@ -690,7 +784,6 @@
dquot->dq_btime = (time_t) 0;
dquot->dq_flags &= ~DQ_BLKS;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline char need_print_warning(short type, uid_t initiator, struct dquot *dquot)
@@ -709,7 +802,7 @@
return(initiator == 0 && dquot->dq_mnt->mnt_dquot.rsquash[dquot->dq_type] == 0);
}

-static int check_idq(struct dquot *dquot, short type, u_long short inodes, uid_t initiator,
+static int check_idq(struct dquot *dquot, short type, u_long inodes, uid_t initiator,
struct tty_struct *tty)
{
if (inodes <= 0 || dquot->dq_flags & DQ_FAKE)
@@ -874,9 +967,11 @@
if (dquot == NODQUOT)
goto out;

+ lock_dquot(dquot); /* We must protect against invalidating the quota */
error = -EFAULT;
if (dqblk && !copy_to_user(dqblk, &dquot->dq_dqb, sizeof(struct dqblk)))
error = 0;
+ unlock_dquot(dquot);
dqput(dquot);
out:
return error;
@@ -917,14 +1012,14 @@
* This is a simple algorithm that calculates the size of a file in blocks.
* This is only used on filesystems that do not have an i_blocks count.
*/
-static u_long isize_to_blocks(size_t isize, size_t blksize)
+static u_long isize_to_blocks(loff_t isize, size_t blksize_bits)
{
u_long blocks;
u_long indirect;

- if (!blksize)
- blksize = BLOCK_SIZE;
- blocks = (isize / blksize) + ((isize % blksize) ? 1 : 0);
+ if (!blksize_bits)
+ blksize_bits = BLOCK_SIZE_BITS;
+ blocks = (isize >> blksize_bits) + ((isize & ~((1 << blksize_bits)-1)) ? 1 : 0);
if (blocks > 10) {
indirect = ((blocks - 11) >> 8) + 1; /* single indirect blocks */
if (blocks > (10 + 256)) {
@@ -934,7 +1029,7 @@
}
blocks += indirect;
}
- return(blocks);
+ return blocks;
}

/*
@@ -951,6 +1046,9 @@
if (S_ISREG(inode->i_mode) ||
S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) {
+ /* We don't want to have quotas on quota files - nasty deadlocks possible */
+ if (is_quotafile(inode))
+ return;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
@@ -968,6 +1066,8 @@
break;
}
dquot = dqget(inode->i_dev, id, cnt);
+ if (dquot == NODQUOT)
+ continue;
if (inode->i_dquot[cnt] != NODQUOT) {
dqput(dquot);
continue;
@@ -1005,23 +1105,36 @@
int dquot_alloc_block(const struct inode *inode, unsigned long number, uid_t initiator,
char warn)
{
- unsigned short cnt;
+ int cnt;
struct tty_struct *tty = current->tty;
+ struct dquot *dquot[MAXQUOTAS];

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot[cnt] = dqduplicate(inode->i_dquot[cnt]);
+ if (dquot[cnt] == NODQUOT)
continue;
- if (check_bdq(inode->i_dquot[cnt], cnt, number, initiator, tty, warn))
- return(NO_QUOTA);
+ lock_dquot(dquot[cnt]);
+ if (check_bdq(dquot[cnt], cnt, number, initiator, tty, warn))
+ goto put_all;
}

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ if (dquot[cnt] == NODQUOT)
continue;
- dquot_incr_blocks(inode->i_dquot[cnt], number);
+ dquot_incr_blocks(dquot[cnt], number);
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
}

- return(QUOTA_OK);
+ return QUOTA_OK;
+put_all:
+ for (; cnt >= 0; cnt--) {
+ if (dquot[cnt] == NODQUOT)
+ continue;
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
+ }
+ return NO_QUOTA;
}

/*
@@ -1029,23 +1142,36 @@
*/
int dquot_alloc_inode(const struct inode *inode, unsigned long number, uid_t initiator)
{
- unsigned short cnt;
+ int cnt;
struct tty_struct *tty = current->tty;
+ struct dquot *dquot[MAXQUOTAS];

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot[cnt] = dqduplicate(inode -> i_dquot[cnt]);
+ if (dquot[cnt] == NODQUOT)
continue;
- if (check_idq(inode->i_dquot[cnt], cnt, number, initiator, tty))
- return(NO_QUOTA);
+ lock_dquot(dquot[cnt]);
+ if (check_idq(dquot[cnt], cnt, number, initiator, tty))
+ goto put_all;
}

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ if (dquot[cnt] == NODQUOT)
continue;
- dquot_incr_inodes(inode->i_dquot[cnt], number);
+ dquot_incr_inodes(dquot[cnt], number);
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
}

- return(QUOTA_OK);
+ return QUOTA_OK;
+put_all:
+ for (; cnt >= 0; cnt--) {
+ if (dquot[cnt] == NODQUOT)
+ continue;
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
+ }
+ return NO_QUOTA;
}

/*
@@ -1054,11 +1180,14 @@
void dquot_free_block(const struct inode *inode, unsigned long number)
{
unsigned short cnt;
+ struct dquot *dquot;

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot = inode->i_dquot[cnt];
+ if (dquot == NODQUOT)
continue;
- dquot_decr_blocks(inode->i_dquot[cnt], number);
+ wait_on_dquot(dquot);
+ dquot_decr_blocks(dquot, number);
}
}

@@ -1068,11 +1197,14 @@
void dquot_free_inode(const struct inode *inode, unsigned long number)
{
unsigned short cnt;
+ struct dquot *dquot;

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot = inode->i_dquot[cnt];
+ if (dquot == NODQUOT)
continue;
- dquot_decr_inodes(inode->i_dquot[cnt], number);
+ wait_on_dquot(dquot);
+ dquot_decr_inodes(dquot, number);
}
}

@@ -1081,21 +1213,25 @@
*
* Note: this is a blocking operation.
*/
-int dquot_transfer(struct inode *inode, struct iattr *iattr, char direction, uid_t initiator)
+int dquot_transfer(struct dentry *dentry, struct iattr *iattr, uid_t initiator)
{
+ struct inode *inode = dentry -> d_inode;
unsigned long blocks;
struct dquot *transfer_from[MAXQUOTAS];
struct dquot *transfer_to[MAXQUOTAS];
struct tty_struct *tty = current->tty;
short cnt, disc;
+ int error = -EDQUOT;

+ if (!inode)
+ return -ENOENT;
/*
* Find out if this filesystem uses i_blocks.
*/
- if (inode->i_blksize == 0)
- blocks = isize_to_blocks(inode->i_size, BLOCK_SIZE);
+ if (!inode->i_sb || !inode->i_sb->s_blocksize)
+ blocks = isize_to_blocks(inode->i_size, BLOCK_SIZE_BITS);
else
- blocks = (inode->i_blocks / 2);
+ blocks = (inode->i_blocks >> 1);

/*
* Build the transfer_from and transfer_to lists and check quotas to see
@@ -1112,27 +1248,60 @@
case USRQUOTA:
if (inode->i_uid == iattr->ia_uid)
continue;
- transfer_from[cnt] = dqget(inode->i_dev, (direction) ? iattr->ia_uid : inode->i_uid, cnt);
- transfer_to[cnt] = dqget(inode->i_dev, (direction) ? inode->i_uid : iattr->ia_uid, cnt);
+ /* We can get transfer_from from inode, can't we? */
+ transfer_from[cnt] = dqget(inode->i_dev, inode->i_uid, cnt);
+ transfer_to[cnt] = dqget(inode->i_dev, iattr->ia_uid, cnt);
break;
case GRPQUOTA:
if (inode->i_gid == iattr->ia_gid)
continue;
- transfer_from[cnt] = dqget(inode->i_dev, (direction) ? iattr->ia_gid : inode->i_gid, cnt);
- transfer_to[cnt] = dqget(inode->i_dev, (direction) ? inode->i_gid : iattr->ia_gid, cnt);
+ transfer_from[cnt] = dqget(inode->i_dev, inode->i_gid, cnt);
+ transfer_to[cnt] = dqget(inode->i_dev, iattr->ia_gid, cnt);
break;
}

- if (check_idq(transfer_to[cnt], cnt, 1, initiator, tty) == NO_QUOTA ||
- check_bdq(transfer_to[cnt], cnt, blocks, initiator, tty, 0) == NO_QUOTA) {
- for (disc = 0; disc <= cnt; disc++) {
- dqput(transfer_from[disc]);
- dqput(transfer_to[disc]);
+ /* Something bad (eg. quotaoff) happened while we were sleeping? */
+ if (transfer_from[cnt] == NODQUOT || transfer_to[cnt] == NODQUOT)
+ {
+ if (transfer_from[cnt] != NODQUOT) {
+ dqput(transfer_from[cnt]);
+ transfer_from[cnt] = NODQUOT;
}
- return(NO_QUOTA);
+ if (transfer_to[cnt] != NODQUOT) {
+ dqput(transfer_to[cnt]);
+ transfer_to[cnt] = NODQUOT;
+ }
+ continue;
+ }
+ /*
+ * We have to lock the quotas to prevent races...
+ */
+ if (transfer_from[cnt] < transfer_to[cnt])
+ {
+ lock_dquot(transfer_from[cnt]);
+ lock_dquot(transfer_to[cnt]);
+ }
+ else
+ {
+ lock_dquot(transfer_to[cnt]);
+ lock_dquot(transfer_from[cnt]);
+ }
+
+ /*
+ * The entries might got invalidated while locking. The second
+ * dqget() could block and so the first structure might got
+ * invalidated or locked...
+ */
+ if (!transfer_to[cnt]->dq_mnt || !transfer_from[cnt]->dq_mnt ||
+ check_idq(transfer_to[cnt], cnt, 1, initiator, tty) == NO_QUOTA ||
+ check_bdq(transfer_to[cnt], cnt, blocks, initiator, tty, 0) == NO_QUOTA) {
+ cnt++;
+ goto put_all;
}
}

+ if ((error = notify_change(dentry, iattr)))
+ goto put_all;
/*
* Finally perform the needed transfer from transfer_from to transfer_to,
* and release any pointers to dquots not needed anymore.
@@ -1144,28 +1313,39 @@
if (transfer_from[cnt] == NODQUOT && transfer_to[cnt] == NODQUOT)
continue;

- if (transfer_from[cnt] != NODQUOT) {
- dquot_decr_inodes(transfer_from[cnt], 1);
- dquot_decr_blocks(transfer_from[cnt], blocks);
- }
+ dquot_decr_inodes(transfer_from[cnt], 1);
+ dquot_decr_blocks(transfer_from[cnt], blocks);

- if (transfer_to[cnt] != NODQUOT) {
- dquot_incr_inodes(transfer_to[cnt], 1);
- dquot_incr_blocks(transfer_to[cnt], blocks);
- }
+ dquot_incr_inodes(transfer_to[cnt], 1);
+ dquot_incr_blocks(transfer_to[cnt], blocks);

+ unlock_dquot(transfer_from[cnt]);
+ dqput(transfer_from[cnt]);
if (inode->i_dquot[cnt] != NODQUOT) {
struct dquot *temp = inode->i_dquot[cnt];
inode->i_dquot[cnt] = transfer_to[cnt];
+ unlock_dquot(transfer_to[cnt]);
dqput(temp);
- dqput(transfer_from[cnt]);
} else {
- dqput(transfer_from[cnt]);
+ unlock_dquot(transfer_to[cnt]);
dqput(transfer_to[cnt]);
}
}

- return(QUOTA_OK);
+ return 0;
+put_all:
+ for (disc = 0; disc < cnt; disc++) {
+ /* There should be none or both pointers set but... */
+ if (transfer_to[disc] != NODQUOT) {
+ unlock_dquot(transfer_to[disc]);
+ dqput(transfer_to[disc]);
+ }
+ if (transfer_from[disc] != NODQUOT) {
+ unlock_dquot(transfer_from[disc]);
+ dqput(transfer_from[disc]);
+ }
+ }
+ return error;
}


@@ -1221,6 +1401,9 @@
}
}

+/* Function in inode.c - remove pointers to dquots in icache */
+extern void remove_dquot_ref(kdev_t, short);
+
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
@@ -1229,48 +1412,57 @@
struct vfsmount *vfsmnt;
struct file *filp;
short cnt;
+ int enabled = 0;
+
+ /* We don't need to search for vfsmnt each time - umount has to wait for us */
+ vfsmnt = lookup_vfsmnt(dev);
+ if (!vfsmnt || !vfsmnt->mnt_sb)
+ goto out;

+ /* We need to serialize quota_off() for device */
+ down(&vfsmnt->mnt_dquot.dqoff_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
-
- vfsmnt = lookup_vfsmnt(dev);
- if (!vfsmnt)
- goto out;
- if (!vfsmnt->mnt_sb)
- goto out;
if (!is_enabled(vfsmnt, cnt))
continue;
reset_enable_flags(vfsmnt, cnt);

/* Note: these are blocking operations */
- reset_dquot_ptrs(dev, cnt);
+ remove_dquot_ref(dev, cnt);
invalidate_dquots(dev, cnt);

+ /* Wait for any pending IO - remove me as soon as invalidate is more polite */
+ down(&vfsmnt->mnt_dquot.dqio_sem);
filp = vfsmnt->mnt_dquot.files[cnt];
vfsmnt->mnt_dquot.files[cnt] = (struct file *)NULL;
vfsmnt->mnt_dquot.inode_expire[cnt] = 0;
vfsmnt->mnt_dquot.block_expire[cnt] = 0;
+ up(&vfsmnt->mnt_dquot.dqio_sem);
fput(filp);
- }
+ }

/*
* Check whether any quota is still enabled,
* and if not clear the dq_op pointer.
*/
- vfsmnt = lookup_vfsmnt(dev);
- if (vfsmnt && vfsmnt->mnt_sb) {
- int enabled = 0;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- enabled |= is_enabled(vfsmnt, cnt);
- if (!enabled)
- vfsmnt->mnt_sb->dq_op = NULL;
- }
-
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ enabled |= is_enabled(vfsmnt, cnt);
+ if (!enabled)
+ vfsmnt->mnt_sb->dq_op = NULL;
+ up(&vfsmnt->mnt_dquot.dqoff_sem);
out:
return(0);
}

+static inline int check_quotafile_size(loff_t size)
+{
+ ulong blocks = size >> BLOCK_SIZE_BITS;
+ size_t off = size & ~(BLOCK_SIZE - 1);
+
+ return !((blocks % sizeof(struct dqblk) + off % sizeof(struct dqblk)) % sizeof(struct dqblk));
+}
+
int quota_on(kdev_t dev, short type, char *path)
{
struct file *f;
@@ -1287,47 +1479,52 @@

if (is_enabled(vfsmnt, type))
return -EBUSY;
- mnt_dquot = &vfsmnt->mnt_dquot;

+ mnt_dquot = &vfsmnt->mnt_dquot;
+ down(&mnt_dquot->dqoff_sem);
tmp = getname(path);
error = PTR_ERR(tmp);
if (IS_ERR(tmp))
- return error;
+ goto out_lock;

f = filp_open(tmp, O_RDWR, 0600);
putname(tmp);
- if (IS_ERR(f))
- return PTR_ERR(f);

- /* sanity checks */
+ error = PTR_ERR(f);
+ if (IS_ERR(f))
+ goto out_lock;
error = -EIO;
if (!f->f_op->read && !f->f_op->write)
- goto cleanup;
+ goto out_f;
inode = f->f_dentry->d_inode;
error = -EACCES;
if (!S_ISREG(inode->i_mode))
- goto cleanup;
+ goto out_f;
error = -EINVAL;
- if (inode->i_size == 0 || (inode->i_size % sizeof(struct dqblk)) != 0)
- goto cleanup;
+ if (inode->i_size == 0 || !check_quotafile_size(inode->i_size))
+ goto out_f;
+ dquot_drop(inode); /* We don't want quota on quota files */

- /* OK, there we go */
set_enable_flags(vfsmnt, type);
mnt_dquot->files[type] = f;

dquot = dqget(dev, 0, type);
- mnt_dquot->inode_expire[type] = (dquot) ? dquot->dq_itime : MAX_IQ_TIME;
- mnt_dquot->block_expire[type] = (dquot) ? dquot->dq_btime : MAX_DQ_TIME;
+ mnt_dquot->inode_expire[type] = (dquot != NODQUOT) ? dquot->dq_itime : MAX_IQ_TIME;
+ mnt_dquot->block_expire[type] = (dquot != NODQUOT) ? dquot->dq_btime : MAX_DQ_TIME;
dqput(dquot);

vfsmnt->mnt_sb->dq_op = &dquot_operations;
add_dquot_ref(dev, type);

- return(0);
+ up(&mnt_dquot->dqoff_sem);
+ return 0;

-cleanup:
- fput(f);
- return error;
+out_f:
+ filp_close(f, NULL);
+out_lock:
+ up(&mnt_dquot->dqoff_sem);
+
+ return error;
}

/*
@@ -1348,6 +1545,9 @@

if ((u_int) type >= MAXQUOTAS)
goto out;
+ if (id & ~0xFFFF)
+ goto out;
+
ret = -EPERM;
switch (cmds) {
case Q_SYNC:
@@ -1355,7 +1555,7 @@
break;
case Q_GETQUOTA:
if (((type == USRQUOTA && current->euid != id) ||
- (type == GRPQUOTA && current->egid != id)) &&
+ (type == GRPQUOTA && in_group_p(id))) &&
!capable(CAP_SYS_RESOURCE))
goto out;
break;
@@ -1365,7 +1565,7 @@
}

ret = -EINVAL;
- dev = 0;
+ dev = NODEV;
if (special != NULL || (cmds != Q_SYNC && cmds != Q_GETSTATS)) {
mode_t mode;
struct dentry * dentry;
--- linux/CREDITS Sat Nov 6 19:19:43 1999
+++ linux/CREDITS Wed Nov 10 17:52:15 1999
@@ -1046,6 +1046,17 @@
S: 8103 Rein
S: Austria

+N: Jan Kara
+E: jack@atrey.karlin.mff.cuni.cz
+E: jack@suse.cz
+D: Quota fixes for 2.2 kernel
+D: Quota fixes for 2.3 kernel
+D: Few other fixes in filesystem area (buffer cache, isofs, loopback)
+W: http://atrey.karlin.mff.cuni.cz/~jack/
+S: Krosenska' 543
+S: 181 00 Praha 8
+S: Czech Republic
+
N: Jan "Yenya" Kasprzak
E: kas@fi.muni.cz
D: Author of the COSA/SRP sync serial board driver.

--YZ5djTAD1cGYuMQK--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/