locking/percpu-rwsem: add freezable alternative to down_read
authorJames Bottomley <James.Bottomley@HansenPartnership.com>
Thu, 27 Mar 2025 14:06:10 +0000 (10:06 -0400)
committerChristian Brauner <brauner@kernel.org>
Mon, 7 Apr 2025 07:37:16 +0000 (09:37 +0200)
Percpu-rwsems are used for superblock locking.  However, we know the
read percpu-rwsem we take for sb_start_write() on a frozen filesystem
needs not to inhibit system from suspending or hibernating.  That
means it needs to wait with TASK_UNINTERRUPTIBLE | TASK_FREEZABLE.

Introduce a new percpu_down_read_freezable() that allows us to control
whether TASK_FREEZABLE is added to the wait flags.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Link: https://lore.kernel.org/r/20250327140613.25178-2-James.Bottomley@HansenPartnership.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
include/linux/percpu-rwsem.h
kernel/locking/percpu-rwsem.c

index af7d75ede61979aa2b28d6242f597077cbefdb4b..288f5235649a5a0877090a806f41508f3ea41c5c 100644 (file)
@@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = {                                \
 #define DEFINE_STATIC_PERCPU_RWSEM(name)       \
        __DEFINE_PERCPU_RWSEM(name, static)
 
-extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem,
+                                            bool freezable)
 {
        might_sleep();
 
@@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
        if (likely(rcu_sync_is_idle(&sem->rss)))
                this_cpu_inc(*sem->read_count);
        else
-               __percpu_down_read(sem, false); /* Unconditional memory barrier */
+               __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */
        /*
         * The preempt_enable() prevents the compiler from
         * bleeding the critical section out.
@@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
        preempt_enable();
 }
 
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+       percpu_down_read_internal(sem, false);
+}
+
+static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem,
+                                             bool freeze)
+{
+       percpu_down_read_internal(sem, freeze);
+}
+
 static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
        bool ret = true;
@@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
        if (likely(rcu_sync_is_idle(&sem->rss)))
                this_cpu_inc(*sem->read_count);
        else
-               ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+               ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */
        preempt_enable();
        /*
         * The barrier() from preempt_enable() prevents the compiler from
index d6964fc29f513997f906a07ff68118ae50520097..ef234469baacab67107c08fc3a53ca1c65543d4f 100644 (file)
@@ -138,7 +138,8 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
        return !reader; /* wake (readers until) 1 writer */
 }
 
-static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader,
+                             bool freeze)
 {
        DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
        bool wait;
@@ -156,7 +157,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
        spin_unlock_irq(&sem->waiters.lock);
 
        while (wait) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
+               set_current_state(TASK_UNINTERRUPTIBLE |
+                                 (freeze ? TASK_FREEZABLE : 0));
                if (!smp_load_acquire(&wq_entry.private))
                        break;
                schedule();
@@ -164,7 +166,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
        __set_current_state(TASK_RUNNING);
 }
 
-bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try,
+                               bool freeze)
 {
        if (__percpu_down_read_trylock(sem))
                return true;
@@ -174,7 +177,7 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
 
        trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
        preempt_enable();
-       percpu_rwsem_wait(sem, /* .reader = */ true);
+       percpu_rwsem_wait(sem, /* .reader = */ true, freeze);
        preempt_disable();
        trace_contention_end(sem, 0);
 
@@ -237,7 +240,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
         */
        if (!__percpu_down_write_trylock(sem)) {
                trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
-               percpu_rwsem_wait(sem, /* .reader = */ false);
+               percpu_rwsem_wait(sem, /* .reader = */ false, false);
                contended = true;
        }