[PATCH 02/19] staging/lustre/obdclass: reorganize busy object accounting

From: green
Date: Mon Sep 14 2015 - 18:47:13 EST


From: Frank Zago <fzago@xxxxxxxx>

Due to some accounting bug, lsb_busy of a hash bucket can become
larger than the total number of objects in said bucket. A busy object
can be counted more than once. When that happens, a negative value is
returned by the shrinker callback.

Instead of trying (and failing) to count the busy objects, count the
objects than are not busy, i.e. the objects that are present on the
lsb_lru list. The number of busy objects is then the difference
between the number of objects in the hash and the objects on the
lsb_lru list.

Signed-off-by: frank zago <fzago@xxxxxxxx>
Reviewed-on: http://review.whamcloud.com/12468
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5722
Reviewed-by: Andreas Dilger <andreas.dilger@xxxxxxxxx>
Reviewed-by: Mike Pershin <mike.pershin@xxxxxxxxx>
Signed-off-by: Oleg Drokin <oleg.drokin@xxxxxxxxx>
---
drivers/staging/lustre/lustre/include/lu_object.h | 4 +--
drivers/staging/lustre/lustre/obdclass/lu_object.c | 35 +++++++++-------------
2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index a16c9ea..ea13a82 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -554,9 +554,9 @@ struct fld;

struct lu_site_bkt_data {
/**
- * number of busy object on this bucket
+ * number of object in this bucket on the lsb_lru list.
*/
- long lsb_busy;
+ long lsb_lru_len;
/**
* LRU list, updated on each access to object. Protected by
* bucket lock of lu_site::ls_obj_hash.
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 3111982..4f7899f 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -113,8 +113,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
return;
}

- LASSERT(bkt->lsb_busy > 0);
- bkt->lsb_busy--;
/*
* When last reference is released, iterate over object
* layers, and notify them that object is no longer busy.
@@ -127,6 +125,7 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
if (!lu_object_is_dying(top)) {
LASSERT(list_empty(&top->loh_lru));
list_add_tail(&top->loh_lru, &bkt->lsb_lru);
+ bkt->lsb_lru_len++;
cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
return;
}
@@ -179,7 +178,13 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
struct cfs_hash_bd bd;

cfs_hash_bd_get_and_lock(obj_hash, &top->loh_fid, &bd, 1);
+ if (!list_empty(&top->loh_lru)) {
+ struct lu_site_bkt_data *bkt;
+
list_del_init(&top->loh_lru);
+ bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
+ bkt->lsb_lru_len--;
+ }
cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
cfs_hash_bd_unlock(obj_hash, &bd, 1);
}
@@ -349,6 +354,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
cfs_hash_bd_del_locked(s->ls_obj_hash,
&bd2, &h->loh_hash);
list_move(&h->loh_lru, &dispose);
+ bkt->lsb_lru_len--;
if (did_sth == 0)
did_sth = 1;

@@ -561,7 +567,10 @@ static struct lu_object *htable_lookup(struct lu_site *s,
if (likely(!lu_object_is_dying(h))) {
cfs_hash_get(s->ls_obj_hash, hnode);
lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
+ if (!list_empty(&h->loh_lru)) {
list_del_init(&h->loh_lru);
+ bkt->lsb_lru_len--;
+ }
return lu_object_top(h);
}

@@ -599,7 +608,6 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
struct lu_object *o;
struct cfs_hash *hs;
struct cfs_hash_bd bd;
- struct lu_site_bkt_data *bkt;

o = lu_object_alloc(env, dev, f, conf);
if (IS_ERR(o))
@@ -607,9 +615,7 @@ static struct lu_object *lu_object_new(const struct lu_env *env,

hs = dev->ld_site->ls_obj_hash;
cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
- bkt = cfs_hash_bd_extra_get(hs, &bd);
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
- bkt->lsb_busy++;
cfs_hash_bd_unlock(hs, &bd, 1);
return o;
}
@@ -675,11 +681,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,

shadow = htable_lookup(s, &bd, f, waiter, &version);
if (likely(PTR_ERR(shadow) == -ENOENT)) {
- struct lu_site_bkt_data *bkt;
-
- bkt = cfs_hash_bd_extra_get(hs, &bd);
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
- bkt->lsb_busy++;
cfs_hash_bd_unlock(hs, &bd, 1);
return o;
}
@@ -926,14 +928,7 @@ static void lu_obj_hop_get(struct cfs_hash *hs, struct hlist_node *hnode)
struct lu_object_header *h;

h = hlist_entry(hnode, struct lu_object_header, loh_hash);
- if (atomic_add_return(1, &h->loh_ref) == 1) {
- struct lu_site_bkt_data *bkt;
- struct cfs_hash_bd bd;
-
- cfs_hash_bd_get(hs, &h->loh_fid, &bd);
- bkt = cfs_hash_bd_extra_get(hs, &bd);
- bkt->lsb_busy++;
- }
+ atomic_inc(&h->loh_ref);
}

static void lu_obj_hop_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
@@ -1802,7 +1797,8 @@ static void lu_site_stats_get(struct cfs_hash *hs,
struct hlist_head *hhead;

cfs_hash_bd_lock(hs, &bd, 1);
- stats->lss_busy += bkt->lsb_busy;
+ stats->lss_busy +=
+ cfs_hash_bd_count_get(&bd) - bkt->lsb_lru_len;
stats->lss_total += cfs_hash_bd_count_get(&bd);
stats->lss_max_search = max((int)stats->lss_max_search,
cfs_hash_bd_depmax_get(&bd));
@@ -2067,7 +2063,6 @@ void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o,
{
struct lu_site *s = o->lo_dev->ld_site;
struct lu_fid *old = &o->lo_header->loh_fid;
- struct lu_site_bkt_data *bkt;
struct lu_object *shadow;
wait_queue_t waiter;
struct cfs_hash *hs;
@@ -2082,9 +2077,7 @@ void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o,
/* supposed to be unique */
LASSERT(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT);
*old = *fid;
- bkt = cfs_hash_bd_extra_get(hs, &bd);
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
- bkt->lsb_busy++;
cfs_hash_bd_unlock(hs, &bd, 1);
}
EXPORT_SYMBOL(lu_object_assign_fid);
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/