NFSv4: fairly test all delegations on a SEQ4_ revocation
authorBenjamin Coddington <bcodding@redhat.com>
Thu, 24 Aug 2023 18:52:19 +0000 (14:52 -0400)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Wed, 1 Nov 2023 19:15:52 +0000 (15:15 -0400)
When the client is required to use TEST_STATEID to discover which
delegation(s) have been revoked, it may continually test delegations at the
head of the list if the server continues to be unsatisfied and send
SEQ4_STATUS_RECALLABLE_STATE_REVOKED.  For a large number of delegations
this behavior is prone to live-lock because the client may never be able to
test and free revoked state at the end of the list since the
SEQ4_STATUS_RECALLABLE_STATE_REVOKED will cause us to flag delegations at
the head of the list to be tested.  This problem is further exacerbated by
the state manager's willingness to be scheduled out on a busy system while
testing the list of delegations.

Keep a generation counter for each attempt to test all delegations, and
skip delegations that have already been tested in the current pass.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Tested-by: Torkil Svensgaard <torkil@drcmr.dk>
Tested-by: Ruben Vestergaard <rubenv@drcmr.dk>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/delegation.c
fs/nfs/delegation.h
include/linux/nfs_fs_sb.h

index cf7365581031b5c4442956a6643572580abc9abc..fa1a14def45cea2fc485b598831becc9e7497992 100644 (file)
@@ -448,6 +448,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
        delegation->cred = get_cred(cred);
        delegation->inode = inode;
        delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
+       delegation->test_gen = 0;
        spin_lock_init(&delegation->lock);
 
        spin_lock(&clp->cl_lock);
@@ -1294,6 +1295,8 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
        struct inode *inode;
        const struct cred *cred;
        nfs4_stateid stateid;
+       unsigned long gen = ++server->delegation_gen;
+
 restart:
        rcu_read_lock();
 restart_locked:
@@ -1303,7 +1306,8 @@ restart_locked:
                    test_bit(NFS_DELEGATION_RETURNING,
                                        &delegation->flags) ||
                    test_bit(NFS_DELEGATION_TEST_EXPIRED,
-                                       &delegation->flags) == 0)
+                                       &delegation->flags) == 0 ||
+                       delegation->test_gen == gen)
                        continue;
                inode = nfs_delegation_grab_inode(delegation);
                if (inode == NULL)
@@ -1312,6 +1316,7 @@ restart_locked:
                cred = get_cred_rcu(delegation->cred);
                nfs4_stateid_copy(&stateid, &delegation->stateid);
                spin_unlock(&delegation->lock);
+               delegation->test_gen = gen;
                clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
                rcu_read_unlock();
                nfs_delegation_test_free_expired(inode, &stateid, cred);
index 1c378992b7c0fce3be940398f5ced5ac3e36c7c4..a6f495d012cf1156700813baec8b1c7f995c5723 100644 (file)
@@ -21,6 +21,7 @@ struct nfs_delegation {
        fmode_t type;
        unsigned long pagemod_limit;
        __u64 change_attr;
+       unsigned long test_gen;
        unsigned long flags;
        refcount_t refcount;
        spinlock_t lock;
index cd628c4b011e54fa807674daa8a3361c97c20a5f..cd797e00fe359a91b44b2e012309e87d5b446a7e 100644 (file)
@@ -239,6 +239,7 @@ struct nfs_server {
        struct list_head        delegations;
        struct list_head        ss_copies;
 
+       unsigned long           delegation_gen;
        unsigned long           mig_gen;
        unsigned long           mig_status;
 #define NFS_MIG_IN_TRANSITION          (1)