Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalableretrieval of tokens from token jar

From: Tim Chen
Date: Mon Jun 14 2010 - 21:28:26 EST

Next message: Xiao Guangrong: "Re: [PATCH 7/7] KVM: MMU: don't walk every parent pages while markunsync"
Previous message: Justin P. Mattock: "[PATCH 2/8 v2]Bluetooth: Fix warning: variable 'tty' set but not used"
In reply to: Andrew Morton: "Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalableretrieval of tokens from token jar"
Next in thread: Andrew Morton: "Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalableretrieval of tokens from token jar"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Andrew,

I have tweaked your patch a bit and put in your suggestion of
implementing a percpu_counter_compare (see below), which
allowed for accurate but fast comparison. This
is just meant for discussion so I have not broken
it into two patches (the percpu_counter part and shmem part).

One thing still bothers me with this approach:
When we are doing a remount of tmpfs, we cannot lock the
percpu_counter. So we cannot guarantee that it won't get updated
after we read it. So we could overshoot
the new quota after a remount, or missed accounting for the
pages being returned while we remount. Is this tolerable?

My previous qtoken implementation uses a special value (-1)
to denote that the per cpu cache is disabled and synchronized access
by the lock on the whole counter. So I didn't have to worry
that my count was inaccurate. This facility to lock access and freeze
counter update is not available in current percpu_counter
implementation.

Tim

diff --git a/include/linux/percpu_counter.h
b/include/linux/percpu_counter.h
index c88d67b..8a7d510 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -40,6 +40,7 @@ void percpu_counter_destroy(struct percpu_counter
*fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32
batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);

static inline void percpu_counter_add(struct percpu_counter *fbc, s64
amount)
{
@@ -98,6 +99,16 @@ static inline void percpu_counter_set(struct
percpu_counter *fbc, s64 amount)
fbc->count = amount;
}

+static inline int percpu_counter_compare(struct percpu_counter *fbc,
s64 rhs)
+{
+ if (fbc->count > rhs)
+ return 1;
+ else if (fbc->count < rhs)
+ return -1;
+ else
+ return 0;
+}
+
static inline void
percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e164291..d01d69e 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -3,6 +3,7 @@

#include <linux/swap.h>
#include <linux/mempolicy.h>
+#include <linux/percpu_counter.h>

/* inode in-kernel data */

@@ -23,7 +24,7 @@ struct shmem_inode_info {

struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
- unsigned long free_blocks; /* How many are left for allocation */
+ struct percpu_counter free_blocks; /* How many are left for
allocation */
unsigned long max_inodes; /* How many inodes are allowed */
unsigned long free_inodes; /* How many are left for allocation */
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d7..583841d 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -137,6 +137,33 @@ static int __cpuinit
percpu_counter_hotcpu_callback(struct notifier_block *nb,
return NOTIFY_OK;
}

+/*
+ * Compare counter against given value.
+ * Return 1 if greater, 0 if equal and -1 if less
+ */
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+ s64 count;
+
+ count = percpu_counter_read(fbc);
+ /* Check to see if approx count will be sufficient for comparison */
+ if (abs(count - rhs) >
(percpu_counter_batch*num_online_cpus())){
+ if (count > rhs)
+ return 1;
+ else
+ return -1;
+ }
+ /* Need to use more precise count */
+ count = percpu_counter_sum(fbc);
+ if (count > rhs)
+ return 1;
+ else if (count < rhs)
+ return -1;
+ else
+ return 0;
+}
+EXPORT_SYMBOL(percpu_counter_compare);
+
static int __init percpu_counter_startup(void)
{
compute_batch_value();
diff --git a/mm/shmem.c b/mm/shmem.c
index eef4ebe..7cd4f24 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -28,6 +28,7 @@
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/percpu_counter.h>
#include <linux/swap.h>

static struct vfsmount *shm_mnt;
@@ -233,8 +234,8 @@ static void shmem_free_blocks(struct inode *inode,
long pages)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
+ percpu_counter_add(&sbinfo->free_blocks, pages);
spin_lock(&sbinfo->stat_lock);
- sbinfo->free_blocks += pages;
inode->i_blocks -= pages*BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
@@ -422,11 +423,11 @@ static swp_entry_t *shmem_swp_alloc(struct
shmem_inode_info *info, unsigned long
*/
if (sbinfo->max_blocks) {
spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks <= 1) {
+ if (percpu_counter_compare(&sbinfo->free_blocks, 1) <= 0) {
spin_unlock(&sbinfo->stat_lock);
return ERR_PTR(-ENOSPC);
}
- sbinfo->free_blocks--;
+ percpu_counter_dec(&sbinfo->free_blocks);
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
@@ -1386,14 +1387,14 @@ repeat:
sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks == 0 ||
+ if ((percpu_counter_compare(&sbinfo->free_blocks, 0) <= 0) ||
shmem_acct_block(info->flags)) {
spin_unlock(&sbinfo->stat_lock);
spin_unlock(&info->lock);
error = -ENOSPC;
goto failed;
}
- sbinfo->free_blocks--;
+ percpu_counter_dec(&sbinfo->free_blocks);
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
} else if (shmem_acct_block(info->flags)) {
@@ -1794,7 +1795,8 @@ static int shmem_statfs(struct dentry *dentry,
struct kstatfs *buf)
spin_lock(&sbinfo->stat_lock);
if (sbinfo->max_blocks) {
buf->f_blocks = sbinfo->max_blocks;
- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+ buf->f_bavail = buf->f_bfree =
+ percpu_counter_sum(&sbinfo->free_blocks);
}
if (sbinfo->max_inodes) {
buf->f_files = sbinfo->max_inodes;
@@ -2258,7 +2260,7 @@ static int shmem_remount_fs(struct super_block
*sb, int *flags, char *data)
return error;

spin_lock(&sbinfo->stat_lock);
- blocks = sbinfo->max_blocks - sbinfo->free_blocks;
+ blocks = sbinfo->max_blocks -
percpu_counter_sum(&sbinfo->free_blocks);
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
if (config.max_blocks < blocks)
goto out;
@@ -2277,7 +2279,7 @@ static int shmem_remount_fs(struct super_block
*sb, int *flags, char *data)

error = 0;
sbinfo->max_blocks = config.max_blocks;
- sbinfo->free_blocks = config.max_blocks - blocks;
+ percpu_counter_init(&sbinfo->free_blocks, config.max_blocks - blocks);
sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes;

@@ -2352,7 +2354,7 @@ int shmem_fill_super(struct super_block *sb, void
*data, int silent)
#endif

spin_lock_init(&sbinfo->stat_lock);
- sbinfo->free_blocks = sbinfo->max_blocks;
+ percpu_counter_init(&sbinfo->free_blocks, sbinfo->max_blocks);
sbinfo->free_inodes = sbinfo->max_inodes;

sb->s_maxbytes = SHMEM_MAX_BYTES;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Xiao Guangrong: "Re: [PATCH 7/7] KVM: MMU: don't walk every parent pages while markunsync"
Previous message: Justin P. Mattock: "[PATCH 2/8 v2]Bluetooth: Fix warning: variable 'tty' set but not used"
In reply to: Andrew Morton: "Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalableretrieval of tokens from token jar"
Next in thread: Andrew Morton: "Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalableretrieval of tokens from token jar"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]