gfs2: Fix lru_count accounting
authorAndreas Gruenbacher <agruenba@redhat.com>
Tue, 9 Apr 2024 11:54:26 +0000 (13:54 +0200)
committerAndreas Gruenbacher <agruenba@redhat.com>
Wed, 24 Apr 2024 17:46:38 +0000 (19:46 +0200)
Currently, gfs2_scan_glock_lru() decrements lru_count when a glock is
moved onto the dispose list.  When such a glock is then stolen from the
dispose list while gfs2_dispose_glock_lru() doesn't hold the lru_lock,
lru_count will be decremented again, so the counter will eventually go
negative.

This bug has existed in one form or another since at least commit
97cc1025b1a91 ("GFS2: Kill two daemons with one patch").

Fix this by only decrementing lru_count when we actually remove a glock
and schedule for it to be unlocked and dropped.  We also don't need to
remove and then re-add glocks when we can just as well move them back
onto the lru_list when necessary.

In addition, return the number of glocks freed as we should, not the
number of glocks moved onto the dispose list.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
fs/gfs2/glock.c

index 04e0a8ac61d7bd4695328e79453cf4e6071ca98b..8e6936b794605eb72eea64175086de3fbc56b7e8 100644 (file)
@@ -2009,29 +2009,30 @@ static bool can_free_glock(struct gfs2_glock *gl)
  * private)
  */
 
-static void gfs2_dispose_glock_lru(struct list_head *list)
+static unsigned long gfs2_dispose_glock_lru(struct list_head *list)
 __releases(&lru_lock)
 __acquires(&lru_lock)
 {
        struct gfs2_glock *gl;
+       unsigned long freed = 0;
 
        list_sort(NULL, list, glock_cmp);
 
        while(!list_empty(list)) {
                gl = list_first_entry(list, struct gfs2_glock, gl_lru);
-               list_del_init(&gl->gl_lru);
-               clear_bit(GLF_LRU, &gl->gl_flags);
                if (!spin_trylock(&gl->gl_lockref.lock)) {
 add_back_to_lru:
-                       list_add(&gl->gl_lru, &lru_list);
-                       set_bit(GLF_LRU, &gl->gl_flags);
-                       atomic_inc(&lru_count);
+                       list_move(&gl->gl_lru, &lru_list);
                        continue;
                }
                if (!can_free_glock(gl)) {
                        spin_unlock(&gl->gl_lockref.lock);
                        goto add_back_to_lru;
                }
+               list_del_init(&gl->gl_lru);
+               atomic_dec(&lru_count);
+               clear_bit(GLF_LRU, &gl->gl_flags);
+               freed++;
                gl->gl_lockref.count++;
                if (demote_ok(gl))
                        handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -2039,6 +2040,7 @@ add_back_to_lru:
                spin_unlock(&gl->gl_lockref.lock);
                cond_resched_lock(&lru_lock);
        }
+       return freed;
 }
 
 /**
@@ -2050,24 +2052,21 @@ add_back_to_lru:
  * gfs2_dispose_glock_lru() above.
  */
 
-static long gfs2_scan_glock_lru(int nr)
+static unsigned long gfs2_scan_glock_lru(unsigned long nr)
 {
        struct gfs2_glock *gl, *next;
        LIST_HEAD(dispose);
-       long freed = 0;
+       unsigned long freed = 0;
 
        spin_lock(&lru_lock);
        list_for_each_entry_safe(gl, next, &lru_list, gl_lru) {
-               if (nr-- <= 0)
+               if (!nr--)
                        break;
-               if (can_free_glock(gl)) {
+               if (can_free_glock(gl))
                        list_move(&gl->gl_lru, &dispose);
-                       atomic_dec(&lru_count);
-                       freed++;
-               }
        }
        if (!list_empty(&dispose))
-               gfs2_dispose_glock_lru(&dispose);
+               freed = gfs2_dispose_glock_lru(&dispose);
        spin_unlock(&lru_lock);
 
        return freed;