srcu: Use rcu_segcblist to track SRCU callbacks

author Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Mon, 13 Mar 2017 23:48:18 +0000 (16:48 -0700)

committer Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Tue, 18 Apr 2017 18:38:20 +0000 (11:38 -0700)
author Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Mon, 13 Mar 2017 23:48:18 +0000 (16:48 -0700)
committer Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tue, 18 Apr 2017 18:38:20 +0000 (11:38 -0700)
diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h

new file mode 100644 (file)

index 0000000..74b1e72
--- /dev/null
+++ b/include/linux/rcu_segcblist.h
@@ -0,0 +1,678 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __KERNEL_RCU_SEGCBLIST_H
+#define __KERNEL_RCU_SEGCBLIST_H
+
+/* Simple unsegmented callback lists. */
+struct rcu_cblist {
+       struct rcu_head *head;
+       struct rcu_head **tail;
+       long len;
+       long len_lazy;
+};
+
+#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
+
+/* Initialize simple callback list. */
+static inline void rcu_cblist_init(struct rcu_cblist *rclp)
+{
+       rclp->head = NULL;
+       rclp->tail = &rclp->head;
+       rclp->len = 0;
+       rclp->len_lazy = 0;
+}
+
+/* Is simple callback list empty? */
+static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
+{
+       return !rclp->head;
+}
+
+/* Return number of callbacks in simple callback list. */
+static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
+{
+       return rclp->len;
+}
+
+/* Return number of lazy callbacks in simple callback list. */
+static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
+{
+       return rclp->len_lazy;
+}
+
+/*
+ * Debug function to actually count the number of callbacks.
+ * If the number exceeds the limit specified, return -1.
+ */
+static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
+{
+       int cnt = 0;
+       struct rcu_head **rhpp = &rclp->head;
+
+       for (;;) {
+               if (!*rhpp)
+                       return cnt;
+               if (++cnt > lim)
+                       return -1;
+               rhpp = &(*rhpp)->next;
+       }
+}
+
+/*
+ * Dequeue the oldest rcu_head structure from the specified callback
+ * list.  This function assumes that the callback is non-lazy, but
+ * the caller can later invoke rcu_cblist_dequeued_lazy() if it
+ * finds otherwise (and if it cares about laziness).  This allows
+ * different users to have different ways of determining laziness.
+ */
+static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
+{
+       struct rcu_head *rhp;
+
+       rhp = rclp->head;
+       if (!rhp)
+               return NULL;
+       rclp->len--;
+       rclp->head = rhp->next;
+       if (!rclp->head)
+               rclp->tail = &rclp->head;
+       return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
+{
+       rclp->len_lazy--;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
+{
+       return rclp->head;
+}
+
+/*
+ * Interim function to return rcu_cblist head pointer.  Longer term, the
+ * rcu_cblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
+{
+       WARN_ON_ONCE(rcu_cblist_empty(rclp));
+       return rclp->tail;
+}
+
+/* Complicated segmented callback lists.  ;-) */
+
+/*
+ * Index values for segments in rcu_segcblist structure.
+ *
+ * The segments are as follows:
+ *
+ * [head, *tails[RCU_DONE_TAIL]):
+ *     Callbacks whose grace period has elapsed, and thus can be invoked.
+ * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
+ *     Callbacks waiting for the current GP from the current CPU's viewpoint.
+ * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
+ *     Callbacks that arrived before the next GP started, again from
+ *     the current CPU's viewpoint.  These can be handled by the next GP.
+ * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
+ *     Callbacks that might have arrived after the next GP started.
+ *     There is some uncertainty as to when a given GP starts and
+ *     ends, but a CPU knows the exact times if it is the one starting
+ *     or ending the GP.  Other CPUs know that the previous GP ends
+ *     before the next one starts.
+ *
+ * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
+ * empty.
+ *
+ * The ->gp_seq[] array contains the grace-period number at which the
+ * corresponding segment of callbacks will be ready to invoke.  A given
+ * element of this array is meaningful only when the corresponding segment
+ * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
+ * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
+ * not yet been assigned a grace-period number).
+ */
+#define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL    2       /* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL          3
+#define RCU_CBLIST_NSEGS       4
+
+struct rcu_segcblist {
+       struct rcu_head *head;
+       struct rcu_head **tails[RCU_CBLIST_NSEGS];
+       unsigned long gp_seq[RCU_CBLIST_NSEGS];
+       long len;
+       long len_lazy;
+};
+
+#define RCU_SEGCBLIST_INITIALIZER(n) \
+{ \
+       .head = NULL, \
+       .tails[RCU_DONE_TAIL] = &n.head, \
+       .tails[RCU_WAIT_TAIL] = &n.head, \
+       .tails[RCU_NEXT_READY_TAIL] = &n.head, \
+       .tails[RCU_NEXT_TAIL] = &n.head, \
+}
+
+/*
+ * Initialize an rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
+{
+       int i;
+
+       BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
+       BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
+       rsclp->head = NULL;
+       for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+               rsclp->tails[i] = &rsclp->head;
+       rsclp->len = 0;
+       rsclp->len_lazy = 0;
+}
+
+/*
+ * Is the specified rcu_segcblist structure empty?
+ *
+ * But careful!  The fact that the ->head field is NULL does not
+ * necessarily imply that there are no callbacks associated with
+ * this structure.  When callbacks are being invoked, they are
+ * removed as a group.  If callback invocation must be preempted,
+ * the remaining callbacks will be added back to the list.  Either
+ * way, the counts are updated later.
+ *
+ * So it is often the case that rcu_segcblist_n_cbs() should be used
+ * instead.
+ */
+static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
+{
+       return !rsclp->head;
+}
+
+/* Return number of callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+{
+       return READ_ONCE(rsclp->len);
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
+{
+       return rsclp->len_lazy;
+}
+
+/* Return number of lazy callbacks in segmented callback list. */
+static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
+{
+       return rsclp->len - rsclp->len_lazy;
+}
+
+/*
+ * Is the specified rcu_segcblist enabled, for example, not corresponding
+ * to an offline or callback-offloaded CPU?
+ */
+static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
+{
+       return !!rsclp->tails[RCU_NEXT_TAIL];
+}
+
+/*
+ * Disable the specified rcu_segcblist structure, so that callbacks can
+ * no longer be posted to it.  This structure must be empty.
+ */
+static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
+{
+       WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+       WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+       WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+       rsclp->tails[RCU_NEXT_TAIL] = NULL;
+}
+
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+       if (seg == RCU_DONE_TAIL)
+               return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+       return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
+/*
+ * Are all segments following the specified segment of the specified
+ * rcu_segcblist structure empty of callbacks?  (The specified
+ * segment might well contain callbacks.)
+ */
+static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
+{
+       return !*rsclp->tails[seg];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are ready to be invoked?
+ */
+static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
+{
+       return rcu_segcblist_is_enabled(rsclp) &&
+              &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * are still pending, that is, not yet ready to be invoked?
+ */
+static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
+{
+       return rcu_segcblist_is_enabled(rsclp) &&
+              !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
+}
+
+/*
+ * Dequeue and return the first ready-to-invoke callback.  If there
+ * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
+ * to avoid interference.  Does not protect from interference from other
+ * CPUs or tasks.
+ */
+static inline struct rcu_head *
+rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
+{
+       unsigned long flags;
+       int i;
+       struct rcu_head *rhp;
+
+       local_irq_save(flags);
+       if (!rcu_segcblist_ready_cbs(rsclp)) {
+               local_irq_restore(flags);
+               return NULL;
+       }
+       rhp = rsclp->head;
+       BUG_ON(!rhp);
+       rsclp->head = rhp->next;
+       for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
+               if (rsclp->tails[i] != &rhp->next)
+                       break;
+               rsclp->tails[i] = &rsclp->head;
+       }
+       smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
+       WRITE_ONCE(rsclp->len, rsclp->len - 1);
+       local_irq_restore(flags);
+       return rhp;
+}
+
+/*
+ * Account for the fact that a previously dequeued callback turned out
+ * to be marked as lazy.
+ */
+static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       rsclp->len_lazy--;
+       local_irq_restore(flags);
+}
+
+/*
+ * Return a pointer to the first callback in the specified rcu_segcblist
+ * structure.  This is useful for diagnostics.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
+{
+       if (rcu_segcblist_is_enabled(rsclp))
+               return rsclp->head;
+       return NULL;
+}
+
+/*
+ * Return a pointer to the first pending callback in the specified
+ * rcu_segcblist structure.  This is useful just after posting a given
+ * callback -- if that callback is the first pending callback, then
+ * you cannot rely on someone else having already started up the required
+ * grace period.
+ */
+static inline struct rcu_head *
+rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
+{
+       if (rcu_segcblist_is_enabled(rsclp))
+               return *rsclp->tails[RCU_DONE_TAIL];
+       return NULL;
+}
+
+/*
+ * Does the specified rcu_segcblist structure contain callbacks that
+ * have not yet been processed beyond having been posted, that is,
+ * does it contain callbacks in its last segment?
+ */
+static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
+{
+       return rcu_segcblist_is_enabled(rsclp) &&
+              !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
+}
+
+/*
+ * Enqueue the specified callback onto the specified rcu_segcblist
+ * structure, updating accounting as needed.  Note that the ->len
+ * field may be accessed locklessly, hence the WRITE_ONCE().
+ * The ->len field is used by rcu_barrier() and friends to determine
+ * if it must post a callback on this structure, and it is OK
+ * for rcu_barrier() to sometimes post callbacks needlessly, but
+ * absolutely not OK for it to ever miss posting a callback.
+ */
+static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
+                                        struct rcu_head *rhp, bool lazy)
+{
+       WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+       if (lazy)
+               rsclp->len_lazy++;
+       smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+       rhp->next = NULL;
+       *rsclp->tails[RCU_NEXT_TAIL] = rhp;
+       rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
+}
+
+/*
+ * Extract only the counts from the specified rcu_segcblist structure,
+ * and place them in the specified rcu_cblist structure.  This function
+ * supports both callback orphaning and invocation, hence the separation
+ * of counts and callbacks.  (Callbacks ready for invocation must be
+ * orphaned and adopted separately from pending callbacks, but counts
+ * apply to all callbacks.  Locking must be used to make sure that
+ * both orphaned-callbacks lists are consistent.)
+ */
+static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
+                                              struct rcu_cblist *rclp)
+{
+       rclp->len_lazy += rsclp->len_lazy;
+       rclp->len += rsclp->len;
+       rsclp->len_lazy = 0;
+       WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+}
+
+/*
+ * Extract only those callbacks ready to be invoked from the specified
+ * rcu_segcblist structure and place them in the specified rcu_cblist
+ * structure.
+ */
+static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
+                                                 struct rcu_cblist *rclp)
+{
+       int i;
+
+       if (!rcu_segcblist_ready_cbs(rsclp))
+               return; /* Nothing to do. */
+       *rclp->tail = rsclp->head;
+       rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
+       *rsclp->tails[RCU_DONE_TAIL] = NULL;
+       rclp->tail = rsclp->tails[RCU_DONE_TAIL];
+       for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
+               if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
+                       rsclp->tails[i] = &rsclp->head;
+}
+
+/*
+ * Extract only those callbacks still pending (not yet ready to be
+ * invoked) from the specified rcu_segcblist structure and place them in
+ * the specified rcu_cblist structure.  Note that this loses information
+ * about any callbacks that might have been partway done waiting for
+ * their grace period.  Too bad!  They will have to start over.
+ */
+static inline void
+rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
+                              struct rcu_cblist *rclp)
+{
+       int i;
+
+       if (!rcu_segcblist_pend_cbs(rsclp))
+               return; /* Nothing to do. */
+       *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
+       rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
+       *rsclp->tails[RCU_DONE_TAIL] = NULL;
+       for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+               rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
+}
+
+/*
+ * Move the entire contents of the specified rcu_segcblist structure,
+ * counts, callbacks, and all, to the specified rcu_cblist structure.
+ * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
+ * @@@ Memory barrier needed?  (Not if only used at boot time...)
+ */
+static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
+                                            struct rcu_cblist *rclp)
+{
+       rcu_segcblist_extract_done_cbs(rsclp, rclp);
+       rcu_segcblist_extract_pend_cbs(rsclp, rclp);
+       rcu_segcblist_extract_count(rsclp, rclp);
+}
+
+/*
+ * Insert counts from the specified rcu_cblist structure in the
+ * specified rcu_segcblist structure.
+ */
+static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
+                                             struct rcu_cblist *rclp)
+{
+       rsclp->len_lazy += rclp->len_lazy;
+       /* ->len sampled locklessly. */
+       WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+       rclp->len_lazy = 0;
+       rclp->len = 0;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the beginning of the
+ * done-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
+                                                struct rcu_cblist *rclp)
+{
+       int i;
+
+       if (!rclp->head)
+               return; /* No callbacks to move. */
+       *rclp->tail = rsclp->head;
+       rsclp->head = rclp->head;
+       for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+               if (&rsclp->head == rsclp->tails[i])
+                       rsclp->tails[i] = rclp->tail;
+               else
+                       break;
+       rclp->head = NULL;
+       rclp->tail = &rclp->head;
+}
+
+/*
+ * Move callbacks from the specified rcu_cblist to the end of the
+ * new-callbacks segment of the specified rcu_segcblist.
+ */
+static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
+                                                struct rcu_cblist *rclp)
+{
+       if (!rclp->head)
+               return; /* Nothing to do. */
+       *rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
+       rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
+       rclp->head = NULL;
+       rclp->tail = &rclp->head;
+}
+
+/*
+ * Advance the callbacks in the specified rcu_segcblist structure based
+ * on the current value passed in for the grace-period counter.
+ */
+static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
+                                        unsigned long seq)
+{
+       int i, j;
+
+       WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+       WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+       /*
+        * Find all callbacks whose ->gp_seq numbers indicate that they
+        * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
+        */
+       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+               if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+                       break;
+               rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
+       }
+
+       /* If no callbacks moved, nothing more need be done. */
+       if (i == RCU_WAIT_TAIL)
+               return;
+
+       /* Clean up tail pointers that might have been misordered above. */
+       for (j = RCU_WAIT_TAIL; j < i; j++)
+               rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
+
+       /*
+        * Callbacks moved, so clean up the misordered ->tails[] pointers
+        * that now point into the middle of the list of ready-to-invoke
+        * callbacks.  The overall effect is to copy down the later pointers
+        * into the gap that was created by the now-ready segments.
+        */
+       for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+               if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
+                       break;  /* No more callbacks. */
+               rsclp->tails[j] = rsclp->tails[i];
+               rsclp->gp_seq[j] = rsclp->gp_seq[i];
+       }
+}
+
+/*
+ * "Accelerate" callbacks based on more-accurate grace-period information.
+ * The reason for this is that RCU does not synchronize the beginnings and
+ * ends of grace periods, and that callbacks are posted locally.  This in
+ * turn means that the callbacks must be labelled conservatively early
+ * on, as getting exact information would degrade both performance and
+ * scalability.  When more accurate grace-period information becomes
+ * available, previously posted callbacks can be "accelerated", marking
+ * them to complete at the end of the earlier grace period.
+ *
+ * This function operates on an rcu_segcblist structure, and also the
+ * grace-period sequence number at which new callbacks would become
+ * ready to invoke.
+ */
+static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
+                                           unsigned long seq)
+{
+       int i;
+
+       WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
+       WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
+
+       /*
+        * Find the segment preceding the oldest segment of callbacks
+        * whose ->gp_seq[] completion is at or after that passed in via
+        * "seq", skipping any empty segments.  This oldest segment, along
+        * with any later segments, can be merged in with any newly arrived
+        * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
+        * as their ->gp_seq[] grace-period completion sequence number.
+        */
+       for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
+               if (rsclp->tails[i] != rsclp->tails[i - 1] &&
+                   ULONG_CMP_LT(rsclp->gp_seq[i], seq))
+                       break;
+
+       /*
+        * If all the segments contain callbacks that correspond to
+        * earlier grace-period sequence numbers than "seq", leave.
+        * Assuming that the rcu_segcblist structure has enough
+        * segments in its arrays, this can only happen if some of
+        * the non-done segments contain callbacks that really are
+        * ready to invoke.  This situation will get straightened
+        * out by the next call to rcu_segcblist_advance().
+        *
+        * Also advance to the oldest segment of callbacks whose
+        * ->gp_seq[] completion is at or after that passed in via "seq",
+        * skipping any empty segments.
+        */
+       if (++i >= RCU_NEXT_TAIL)
+               return false;
+
+       /*
+        * Merge all later callbacks, including newly arrived callbacks,
+        * into the segment located by the for-loop above.  Assign "seq"
+        * as the ->gp_seq[] value in order to correctly handle the case
+        * where there were no pending callbacks in the rcu_segcblist
+        * structure other than in the RCU_NEXT_TAIL segment.
+        */
+       for (; i < RCU_NEXT_TAIL; i++) {
+               rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
+               rsclp->gp_seq[i] = seq;
+       }
+       return true;
+}
+
+/*
+ * Scan the specified rcu_segcblist structure for callbacks that need
+ * a grace period later than the one specified by "seq".  We don't look
+ * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
+ * have a grace-period sequence number.
+ */
+static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
+                                                 unsigned long seq)
+{
+       int i;
+
+       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+               if (rsclp->tails[i - 1] != rsclp->tails[i] &&
+                   ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
+                       return true;
+       return false;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
+{
+       return rsclp->head;
+}
+
+/*
+ * Interim function to return rcu_segcblist head pointer.  Longer term, the
+ * rcu_segcblist will be used more pervasively, removing the need for this
+ * function.
+ */
+static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
+{
+       WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
+       return rsclp->tails[RCU_NEXT_TAIL];
+}
+
+#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h

index 047ac8c28a4e2b9207e78ede1a3d15be2c263c68..ad154a7bc114f7c216a509c0498fc483d6f64c4d 100644 (file)
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -22,7 +22,7 @@
   *        Lai Jiangshan <laijs@cn.fujitsu.com>
   *
   * For detailed explanation of Read-Copy Update mechanism see -
- *             Documentation/RCU/ *.txt
+ *             Documentation/RCU/ *.txt
   *
   */
  
@@ -32,31 +32,20 @@
  #include <linux/mutex.h>
  #include <linux/rcupdate.h>
  #include <linux/workqueue.h>
+#include <linux/rcu_segcblist.h>
  
  struct srcu_array {
         unsigned long lock_count[2];
         unsigned long unlock_count[2];
  };
  
-struct rcu_batch {
-       struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
  struct srcu_struct {
         unsigned long completed;
         unsigned long srcu_gp_seq;
         struct srcu_array __percpu *per_cpu_ref;
-       spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+       spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */
         int srcu_state;
-       /* callbacks just queued */
-       struct rcu_batch batch_queue;
-       /* callbacks try to do the first check_zero */
-       struct rcu_batch batch_check0;
-       /* callbacks done with the first check_zero and the flip */
-       struct rcu_batch batch_check1;
-       struct rcu_batch batch_done;
+       struct rcu_segcblist srcu_cblist;
         struct delayed_work work;
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
         struct lockdep_map dep_map;
@@ -97,10 +86,7 @@ void process_srcu(struct work_struct *work);
                 .per_cpu_ref = &name##_srcu_array,                      \
                 .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),    \
                 .srcu_state = SRCU_STATE_IDLE,                          \
-               .batch_queue = RCU_BATCH_INIT(name.batch_queue),        \
-               .batch_check0 = RCU_BATCH_INIT(name.batch_check0),      \
-               .batch_check1 = RCU_BATCH_INIT(name.batch_check1),      \
-               .batch_done = RCU_BATCH_INIT(name.batch_done),          \
+               .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\
                 .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
                 __SRCU_DEP_MAP_INIT(name)                               \
         }
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h

index 0bc1313c49e2d6ec975675c3f7dcee19c8ebe3c9..a943b42a9cf796f60e94ae59dbf2070ddd0210d7 100644 (file)
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -87,6 +87,12 @@ static inline unsigned long rcu_seq_snap(unsigned long *sp)
         return s;
  }
  
+/* Return the current value the update side's sequence number, no ordering. */
+static inline unsigned long rcu_seq_current(unsigned long *sp)
+{
+       return READ_ONCE(*sp);
+}
+
  /*
   * Given a snapshot from rcu_seq_snap(), determine whether or not a
   * full update-side operation has occurred.
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h

deleted file mode 100644 (file)

index 982e3e0..0000000
--- a/kernel/rcu/rcu_segcblist.h
+++ /dev/null
@@ -1,670 +0,0 @@
-/*
- * RCU segmented callback lists
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright IBM Corporation, 2017
- *
- * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- */
-
-#ifndef __KERNEL_RCU_SEGCBLIST_H
-#define __KERNEL_RCU_SEGCBLIST_H
-
-/* Simple unsegmented callback lists. */
-struct rcu_cblist {
-       struct rcu_head *head;
-       struct rcu_head **tail;
-       long len;
-       long len_lazy;
-};
-
-#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
-
-/* Initialize simple callback list. */
-static inline void rcu_cblist_init(struct rcu_cblist *rclp)
-{
-       rclp->head = NULL;
-       rclp->tail = &rclp->head;
-       rclp->len = 0;
-       rclp->len_lazy = 0;
-}
-
-/* Is simple callback list empty? */
-static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
-{
-       return !rclp->head;
-}
-
-/* Return number of callbacks in simple callback list. */
-static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
-{
-       return rclp->len;
-}
-
-/* Return number of lazy callbacks in simple callback list. */
-static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
-{
-       return rclp->len_lazy;
-}
-
-/*
- * Debug function to actually count the number of callbacks.
- * If the number exceeds the limit specified, return -1.
- */
-static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
-{
-       int cnt = 0;
-       struct rcu_head **rhpp = &rclp->head;
-
-       for (;;) {
-               if (!*rhpp)
-                       return cnt;
-               if (++cnt > lim)
-                       return -1;
-               rhpp = &(*rhpp)->next;
-       }
-}
-
-/*
- * Dequeue the oldest rcu_head structure from the specified callback
- * list.  This function assumes that the callback is non-lazy, but
- * the caller can later invoke rcu_cblist_dequeued_lazy() if it
- * finds otherwise (and if it cares about laziness).  This allows
- * different users to have different ways of determining laziness.
- */
-static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
-{
-       struct rcu_head *rhp;
-
-       rhp = rclp->head;
-       if (!rhp)
-               return NULL;
-       prefetch(rhp);
-       rclp->len--;
-       rclp->head = rhp->next;
-       if (!rclp->head)
-               rclp->tail = &rclp->head;
-       return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
-{
-       rclp->len_lazy--;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer.  Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
-{
-       return rclp->head;
-}
-
-/*
- * Interim function to return rcu_cblist head pointer.  Longer term, the
- * rcu_cblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
-{
-       WARN_ON_ONCE(rcu_cblist_empty(rclp));
-       return rclp->tail;
-}
-
-/* Complicated segmented callback lists.  ;-) */
-
-/*
- * Index values for segments in rcu_segcblist structure.
- *
- * The segments are as follows:
- *
- * [head, *tails[RCU_DONE_TAIL]):
- *     Callbacks whose grace period has elapsed, and thus can be invoked.
- * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
- *     Callbacks waiting for the current GP from the current CPU's viewpoint.
- * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
- *     Callbacks that arrived before the next GP started, again from
- *     the current CPU's viewpoint.  These can be handled by the next GP.
- * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
- *     Callbacks that might have arrived after the next GP started.
- *     There is some uncertainty as to when a given GP starts and
- *     ends, but a CPU knows the exact times if it is the one starting
- *     or ending the GP.  Other CPUs know that the previous GP ends
- *     before the next one starts.
- *
- * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
- * empty.
- *
- * The ->gp_seq[] array contains the grace-period number at which the
- * corresponding segment of callbacks will be ready to invoke.  A given
- * element of this array is meaningful only when the corresponding segment
- * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
- * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
- * not yet been assigned a grace-period number).
- */
-#define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL    2       /* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL          3
-#define RCU_CBLIST_NSEGS       4
-
-struct rcu_segcblist {
-       struct rcu_head *head;
-       struct rcu_head **tails[RCU_CBLIST_NSEGS];
-       unsigned long gp_seq[RCU_CBLIST_NSEGS];
-       long len;
-       long len_lazy;
-};
-
-/*
- * Initialize an rcu_segcblist structure.
- */
-static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
-{
-       int i;
-
-       BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
-       BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
-       rsclp->head = NULL;
-       for (i = 0; i < RCU_CBLIST_NSEGS; i++)
-               rsclp->tails[i] = &rsclp->head;
-       rsclp->len = 0;
-       rsclp->len_lazy = 0;
-}
-
-/*
- * Is the specified rcu_segcblist structure empty?
- *
- * But careful!  The fact that the ->head field is NULL does not
- * necessarily imply that there are no callbacks associated with
- * this structure.  When callbacks are being invoked, they are
- * removed as a group.  If callback invocation must be preempted,
- * the remaining callbacks will be added back to the list.  Either
- * way, the counts are updated later.
- *
- * So it is often the case that rcu_segcblist_n_cbs() should be used
- * instead.
- */
-static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
-{
-       return !rsclp->head;
-}
-
-/* Return number of callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
-{
-       return READ_ONCE(rsclp->len);
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
-{
-       return rsclp->len_lazy;
-}
-
-/* Return number of lazy callbacks in segmented callback list. */
-static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
-{
-       return rsclp->len - rsclp->len_lazy;
-}
-
-/*
- * Is the specified rcu_segcblist enabled, for example, not corresponding
- * to an offline or callback-offloaded CPU?
- */
-static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
-{
-       return !!rsclp->tails[RCU_NEXT_TAIL];
-}
-
-/*
- * Disable the specified rcu_segcblist structure, so that callbacks can
- * no longer be posted to it.  This structure must be empty.
- */
-static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
-{
-       WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
-       WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
-       WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
-       rsclp->tails[RCU_NEXT_TAIL] = NULL;
-}
-
-/*
- * Is the specified segment of the specified rcu_segcblist structure
- * empty of callbacks?
- */
-static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
-{
-       if (seg == RCU_DONE_TAIL)
-               return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
-       return rsclp->tails[seg - 1] == rsclp->tails[seg];
-}
-
-/*
- * Are all segments following the specified segment of the specified
- * rcu_segcblist structure empty of callbacks?  (The specified
- * segment might well contain callbacks.)
- */
-static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
-{
-       return !*rsclp->tails[seg];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are ready to be invoked?
- */
-static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
-{
-       return rcu_segcblist_is_enabled(rsclp) &&
-              &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * are still pending, that is, not yet ready to be invoked?
- */
-static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
-{
-       return rcu_segcblist_is_enabled(rsclp) &&
-              !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
-}
-
-/*
- * Dequeue and return the first ready-to-invoke callback.  If there
- * are no ready-to-invoke callbacks, return NULL.  Disables interrupts
- * to avoid interference.  Does not protect from interference from other
- * CPUs or tasks.
- */
-static inline struct rcu_head *
-rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
-{
-       unsigned long flags;
-       int i;
-       struct rcu_head *rhp;
-
-       local_irq_save(flags);
-       if (!rcu_segcblist_ready_cbs(rsclp)) {
-               local_irq_restore(flags);
-               return NULL;
-       }
-       rhp = rsclp->head;
-       BUG_ON(!rhp);
-       rsclp->head = rhp->next;
-       for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
-               if (rsclp->tails[i] != &rhp->next)
-                       break;
-               rsclp->tails[i] = &rsclp->head;
-       }
-       smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
-       WRITE_ONCE(rsclp->len, rsclp->len - 1);
-       local_irq_restore(flags);
-       return rhp;
-}
-
-/*
- * Account for the fact that a previously dequeued callback turned out
- * to be marked as lazy.
- */
-static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       rsclp->len_lazy--;
-       local_irq_restore(flags);
-}
-
-/*
- * Return a pointer to the first callback in the specified rcu_segcblist
- * structure.  This is useful for diagnostics.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
-{
-       if (rcu_segcblist_is_enabled(rsclp))
-               return rsclp->head;
-       return NULL;
-}
-
-/*
- * Return a pointer to the first pending callback in the specified
- * rcu_segcblist structure.  This is useful just after posting a given
- * callback -- if that callback is the first pending callback, then
- * you cannot rely on someone else having already started up the required
- * grace period.
- */
-static inline struct rcu_head *
-rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
-{
-       if (rcu_segcblist_is_enabled(rsclp))
-               return *rsclp->tails[RCU_DONE_TAIL];
-       return NULL;
-}
-
-/*
- * Does the specified rcu_segcblist structure contain callbacks that
- * have not yet been processed beyond having been posted, that is,
- * does it contain callbacks in its last segment?
- */
-static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
-{
-       return rcu_segcblist_is_enabled(rsclp) &&
-              !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
-}
-
-/*
- * Enqueue the specified callback onto the specified rcu_segcblist
- * structure, updating accounting as needed.  Note that the ->len
- * field may be accessed locklessly, hence the WRITE_ONCE().
- * The ->len field is used by rcu_barrier() and friends to determine
- * if it must post a callback on this structure, and it is OK
- * for rcu_barrier() to sometimes post callbacks needlessly, but
- * absolutely not OK for it to ever miss posting a callback.
- */
-static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
-                                        struct rcu_head *rhp, bool lazy)
-{
-       WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
-       if (lazy)
-               rsclp->len_lazy++;
-       smp_mb(); /* Ensure counts are updated before callback is enqueued. */
-       rhp->next = NULL;
-       *rsclp->tails[RCU_NEXT_TAIL] = rhp;
-       rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
-}
-
-/*
- * Extract only the counts from the specified rcu_segcblist structure,
- * and place them in the specified rcu_cblist structure.  This function
- * supports both callback orphaning and invocation, hence the separation
- * of counts and callbacks.  (Callbacks ready for invocation must be
- * orphaned and adopted separately from pending callbacks, but counts
- * apply to all callbacks.  Locking must be used to make sure that
- * both orphaned-callbacks lists are consistent.)
- */
-static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
-                                              struct rcu_cblist *rclp)
-{
-       rclp->len_lazy += rsclp->len_lazy;
-       rclp->len += rsclp->len;
-       rsclp->len_lazy = 0;
-       WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
-}
-
-/*
- * Extract only those callbacks ready to be invoked from the specified
- * rcu_segcblist structure and place them in the specified rcu_cblist
- * structure.
- */
-static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
-                                                 struct rcu_cblist *rclp)
-{
-       int i;
-
-       if (!rcu_segcblist_ready_cbs(rsclp))
-               return; /* Nothing to do. */
-       *rclp->tail = rsclp->head;
-       rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
-       *rsclp->tails[RCU_DONE_TAIL] = NULL;
-       rclp->tail = rsclp->tails[RCU_DONE_TAIL];
-       for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
-               if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
-                       rsclp->tails[i] = &rsclp->head;
-}
-
-/*
- * Extract only those callbacks still pending (not yet ready to be
- * invoked) from the specified rcu_segcblist structure and place them in
- * the specified rcu_cblist structure.  Note that this loses information
- * about any callbacks that might have been partway done waiting for
- * their grace period.  Too bad!  They will have to start over.
- */
-static inline void
-rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
-                              struct rcu_cblist *rclp)
-{
-       int i;
-
-       if (!rcu_segcblist_pend_cbs(rsclp))
-               return; /* Nothing to do. */
-       *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
-       rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
-       *rsclp->tails[RCU_DONE_TAIL] = NULL;
-       for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
-               rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
-}
-
-/*
- * Move the entire contents of the specified rcu_segcblist structure,
- * counts, callbacks, and all, to the specified rcu_cblist structure.
- * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists?
- * @@@ Memory barrier needed?  (Not if only used at boot time...)
- */
-static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
-                                            struct rcu_cblist *rclp)
-{
-       rcu_segcblist_extract_done_cbs(rsclp, rclp);
-       rcu_segcblist_extract_pend_cbs(rsclp, rclp);
-       rcu_segcblist_extract_count(rsclp, rclp);
-}
-
-/*
- * Insert counts from the specified rcu_cblist structure in the
- * specified rcu_segcblist structure.
- */
-static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
-                                             struct rcu_cblist *rclp)
-{
-       rsclp->len_lazy += rclp->len_lazy;
-       /* ->len sampled locklessly. */
-       WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
-       rclp->len_lazy = 0;
-       rclp->len = 0;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the beginning of the
- * done-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
-                                                struct rcu_cblist *rclp)
-{
-       int i;
-
-       if (!rclp->head)
-               return; /* No callbacks to move. */
-       *rclp->tail = rsclp->head;
-       rsclp->head = rclp->head;
-       for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
-               if (&rsclp->head == rsclp->tails[i])
-                       rsclp->tails[i] = rclp->tail;
-               else
-                       break;
-       rclp->head = NULL;
-       rclp->tail = &rclp->head;
-}
-
-/*
- * Move callbacks from the specified rcu_cblist to the end of the
- * new-callbacks segment of the specified rcu_segcblist.
- */
-static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
-                                                struct rcu_cblist *rclp)
-{
-       if (!rclp->head)
-               return; /* Nothing to do. */
-       *rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
-       rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
-       rclp->head = NULL;
-       rclp->tail = &rclp->head;
-}
-
-/*
- * Advance the callbacks in the specified rcu_segcblist structure based
- * on the current value passed in for the grace-period counter.
- */
-static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
-                                        unsigned long seq)
-{
-       int i, j;
-
-       WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-       WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
-
-       /*
-        * Find all callbacks whose ->gp_seq numbers indicate that they
-        * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
-        */
-       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-               if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-                       break;
-               rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
-       }
-
-       /* If no callbacks moved, nothing more need be done. */
-       if (i == RCU_WAIT_TAIL)
-               return;
-
-       /* Clean up tail pointers that might have been misordered above. */
-       for (j = RCU_WAIT_TAIL; j < i; j++)
-               rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
-
-       /*
-        * Callbacks moved, so clean up the misordered ->tails[] pointers
-        * that now point into the middle of the list of ready-to-invoke
-        * callbacks.  The overall effect is to copy down the later pointers
-        * into the gap that was created by the now-ready segments.
-        */
-       for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-               if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
-                       break;  /* No more callbacks. */
-               rsclp->tails[j] = rsclp->tails[i];
-               rsclp->gp_seq[j] = rsclp->gp_seq[i];
-       }
-}
-
-/*
- * "Accelerate" callbacks based on more-accurate grace-period information.
- * The reason for this is that RCU does not synchronize the beginnings and
- * ends of grace periods, and that callbacks are posted locally.  This in
- * turn means that the callbacks must be labelled conservatively early
- * on, as getting exact information would degrade both performance and
- * scalability.  When more accurate grace-period information becomes
- * available, previously posted callbacks can be "accelerated", marking
- * them to complete at the end of the earlier grace period.
- *
- * This function operates on an rcu_segcblist structure, and also the
- * grace-period sequence number at which new callbacks would become
- * ready to invoke.
- */
-static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
-                                           unsigned long seq)
-{
-       int i;
-
-       WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
-       WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
-
-       /*
-        * Find the segment preceding the oldest segment of callbacks
-        * whose ->gp_seq[] completion is at or after that passed in via
-        * "seq", skipping any empty segments.  This oldest segment, along
-        * with any later segments, can be merged in with any newly arrived
-        * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
-        * as their ->gp_seq[] grace-period completion sequence number.
-        */
-       for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
-               if (rsclp->tails[i] != rsclp->tails[i - 1] &&
-                   ULONG_CMP_LT(rsclp->gp_seq[i], seq))
-                       break;
-
-       /*
-        * If all the segments contain callbacks that correspond to
-        * earlier grace-period sequence numbers than "seq", leave.
-        * Assuming that the rcu_segcblist structure has enough
-        * segments in its arrays, this can only happen if some of
-        * the non-done segments contain callbacks that really are
-        * ready to invoke.  This situation will get straightened
-        * out by the next call to rcu_segcblist_advance().
-        *
-        * Also advance to the oldest segment of callbacks whose
-        * ->gp_seq[] completion is at or after that passed in via "seq",
-        * skipping any empty segments.
-        */
-       if (++i >= RCU_NEXT_TAIL)
-               return false;
-
-       /*
-        * Merge all later callbacks, including newly arrived callbacks,
-        * into the segment located by the for-loop above.  Assign "seq"
-        * as the ->gp_seq[] value in order to correctly handle the case
-        * where there were no pending callbacks in the rcu_segcblist
-        * structure other than in the RCU_NEXT_TAIL segment.
-        */
-       for (; i < RCU_NEXT_TAIL; i++) {
-               rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
-               rsclp->gp_seq[i] = seq;
-       }
-       return true;
-}
-
-/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq".  We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-                                                 unsigned long seq)
-{
-       int i;
-
-       for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-               if (rsclp->tails[i - 1] != rsclp->tails[i] &&
-                   ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-                       return true;
-       return false;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer.  Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
-{
-       return rsclp->head;
-}
-
-/*
- * Interim function to return rcu_segcblist head pointer.  Longer term, the
- * rcu_segcblist will be used more pervasively, removing the need for this
- * function.
- */
-static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
-{
-       WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
-       return rsclp->tails[RCU_NEXT_TAIL];
-}
-
-#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c

index d464986d82b6931c18ef172835d8db1ef1a90064..56fd30862122557bcea5f72d627e8399853b47c5 100644 (file)
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -22,7 +22,7 @@
   *        Lai Jiangshan <laijs@cn.fujitsu.com>
   *
   * For detailed explanation of Read-Copy Update mechanism see -
- *             Documentation/RCU/ *.txt
+ *             Documentation/RCU/ *.txt
   *
   */
  
@@ -38,85 +38,13 @@
  
  #include "rcu.h"
  
-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
-       b->head = NULL;
-       b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
-       *b->tail = head;
-       b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
-       return b->tail == &b->head;
-}
-
-/*
- * Are all batches empty for the specified srcu_struct?
- */
-static inline bool rcu_all_batches_empty(struct srcu_struct *sp)
-{
-       return rcu_batch_empty(&sp->batch_done) &&
-              rcu_batch_empty(&sp->batch_check1) &&
-              rcu_batch_empty(&sp->batch_check0) &&
-              rcu_batch_empty(&sp->batch_queue);
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
-       struct rcu_head *head;
-
-       if (rcu_batch_empty(b))
-               return NULL;
-
-       head = b->head;
-       b->head = head->next;
-       if (b->tail == &head->next)
-               rcu_batch_init(b);
-
-       return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
-       if (!rcu_batch_empty(from)) {
-               *to->tail = from->head;
-               to->tail = from->tail;
-               rcu_batch_init(from);
-       }
-}
-
  static int init_srcu_struct_fields(struct srcu_struct *sp)
  {
         sp->completed = 0;
         sp->srcu_gp_seq = 0;
         spin_lock_init(&sp->queue_lock);
         sp->srcu_state = SRCU_STATE_IDLE;
-       rcu_batch_init(&sp->batch_queue);
-       rcu_batch_init(&sp->batch_check0);
-       rcu_batch_init(&sp->batch_check1);
-       rcu_batch_init(&sp->batch_done);
+       rcu_segcblist_init(&sp->srcu_cblist);
         INIT_DELAYED_WORK(&sp->work, process_srcu);
         sp->per_cpu_ref = alloc_percpu(struct srcu_array);
         return sp->per_cpu_ref ? 0 : -ENOMEM;
@@ -268,7 +196,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
  {
         if (WARN_ON(srcu_readers_active(sp)))
                 return; /* Leakage unless caller handles error. */
-       if (WARN_ON(!rcu_all_batches_empty(sp)))
+       if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)))
                 return; /* Leakage unless caller handles error. */
         flush_delayed_work(&sp->work);
         if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE))
@@ -324,6 +252,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
   */
  static void srcu_gp_start(struct srcu_struct *sp)
  {
+       rcu_segcblist_accelerate(&sp->srcu_cblist,
+                                rcu_seq_snap(&sp->srcu_gp_seq));
         WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1);
         rcu_seq_start(&sp->srcu_gp_seq);
  }
@@ -371,6 +301,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
  {
         rcu_seq_end(&sp->srcu_gp_seq);
         WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE);
+
+       spin_lock_irq(&sp->queue_lock);
+       rcu_segcblist_advance(&sp->srcu_cblist,
+                             rcu_seq_current(&sp->srcu_gp_seq));
+       spin_unlock_irq(&sp->queue_lock);
  }
  
  /*
@@ -409,7 +344,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
         head->func = func;
         spin_lock_irqsave(&sp->queue_lock, flags);
         smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-       rcu_batch_queue(&sp->batch_queue, head);
+       rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
         if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
                 srcu_gp_start(sp);
                 queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
@@ -445,13 +380,13 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
         smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
         if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) {
                 /* steal the processing owner */
+               rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
                 srcu_gp_start(sp);
-               rcu_batch_queue(&sp->batch_check0, head);
                 spin_unlock_irq(&sp->queue_lock);
                 /* give the processing owner to work_struct */
                 srcu_reschedule(sp, 0);
         } else {
-               rcu_batch_queue(&sp->batch_queue, head);
+               rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
                 spin_unlock_irq(&sp->queue_lock);
         }
  
@@ -548,19 +483,6 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed);
  #define SRCU_CALLBACK_BATCH    10
  #define SRCU_INTERVAL          1
  
-/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
-       if (!rcu_batch_empty(&sp->batch_queue)) {
-               spin_lock_irq(&sp->queue_lock);
-               rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
-               spin_unlock_irq(&sp->queue_lock);
-       }
-}
-
  /*
   * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
   * ->batch_check1 and then to ->batch_done as readers drain.
@@ -586,26 +508,7 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
                 idx = 1 ^ (sp->completed & 1);
                 if (!try_check_zero(sp, idx, trycount))
                         return; /* readers present, retry after SRCU_INTERVAL */
-
-               /*
-                * The callbacks in ->batch_check1 have already done
-                * with their first zero check and flip back when they were
-                * enqueued on ->batch_check0 in a previous invocation of
-                * srcu_advance_batches().  (Presumably try_check_zero()
-                * returned false during that invocation, leaving the
-                * callbacks stranded on ->batch_check1.) They are therefore
-                * ready to invoke, so move them to ->batch_done.
-                */
-               rcu_batch_move(&sp->batch_done, &sp->batch_check1);
                 srcu_flip(sp);
-
-               /*
-                * The callbacks in ->batch_check0 just finished their
-                * first check zero and flip, so move them to ->batch_check1
-                * for future checking on the other idx.
-                */
-               rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
                 WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2);
         }
  
@@ -619,14 +522,6 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
                 trycount = trycount < 2 ? 2 : trycount;
                 if (!try_check_zero(sp, idx, trycount))
                         return; /* readers present, retry after SRCU_INTERVAL */
-
-               /*
-                * The callbacks in ->batch_check1 have now waited for
-                * all pre-existing readers using both idx values.  They are
-                * therefore ready to invoke, so move them to ->batch_done.
-                */
-               rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
                 srcu_gp_end(sp);
         }
  }
@@ -639,17 +534,26 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
   */
  static void srcu_invoke_callbacks(struct srcu_struct *sp)
  {
-       int i;
-       struct rcu_head *head;
+       struct rcu_cblist ready_cbs;
+       struct rcu_head *rhp;
  
-       for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
-               head = rcu_batch_dequeue(&sp->batch_done);
-               if (!head)
-                       break;
+       spin_lock_irq(&sp->queue_lock);
+       if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
+               spin_unlock_irq(&sp->queue_lock);
+               return;
+       }
+       rcu_cblist_init(&ready_cbs);
+       rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
+       spin_unlock_irq(&sp->queue_lock);
+       rhp = rcu_cblist_dequeue(&ready_cbs);
+       for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
                 local_bh_disable();
-               head->func(head);
+               rhp->func(rhp);
                 local_bh_enable();
         }
+       spin_lock_irq(&sp->queue_lock);
+       rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
+       spin_unlock_irq(&sp->queue_lock);
  }
  
  /*
@@ -660,9 +564,9 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
  {
         bool pending = true;
  
-       if (rcu_all_batches_empty(sp)) {
+       if (rcu_segcblist_empty(&sp->srcu_cblist)) {
                 spin_lock_irq(&sp->queue_lock);
-               if (rcu_all_batches_empty(sp) &&
+               if (rcu_segcblist_empty(&sp->srcu_cblist) &&
                     READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) {
                         WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE);
                         pending = false;
@@ -683,7 +587,6 @@ void process_srcu(struct work_struct *work)
  
         sp = container_of(work, struct srcu_struct, work.work);
  
-       srcu_collect_new(sp);
         srcu_advance_batches(sp, 1);
         srcu_invoke_callbacks(sp);
         srcu_reschedule(sp, SRCU_INTERVAL);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h

index 93889ff21dbb9d216e17d5fb10bfa1b5a9efd835..4f62651588ea64ca7f0c67db25414c00352c2820 100644 (file)
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,7 +30,7 @@
  #include <linux/seqlock.h>
  #include <linux/swait.h>
  #include <linux/stop_machine.h>
-#include "rcu_segcblist.h"
+#include <linux/rcu_segcblist.h>
  
  /*
   * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
author	Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	Mon, 13 Mar 2017 23:48:18 +0000 (16:48 -0700)
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	Tue, 18 Apr 2017 18:38:20 +0000 (11:38 -0700)
include/linux/rcu_segcblist.h	[new file with mode: 0644]	patch \| blob
include/linux/srcu.h		patch \| blob \| blame \| history
kernel/rcu/rcu.h		patch \| blob \| blame \| history
kernel/rcu/rcu_segcblist.h	[deleted file]	patch \| blob \| blame \| history
kernel/rcu/srcu.c		patch \| blob \| blame \| history
kernel/rcu/tree.h		patch \| blob \| blame \| history