netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeba...
authorDavid Howells <dhowells@redhat.com>
Tue, 30 Jul 2024 16:01:40 +0000 (17:01 +0100)
committerChristian Brauner <brauner@kernel.org>
Mon, 12 Aug 2024 20:03:27 +0000 (22:03 +0200)
This reverts commit ae678317b95e760607c7b20b97c9cd4ca9ed6e1a.

Revert the patch that removes the deprecated use of PG_private_2 in
netfslib for the moment as Ceph is actually still using this to track
data copied to the cache.

Fixes: ae678317b95e ("netfs: Remove deprecated use of PG_private_2 as a second writeback flag")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
https: //lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/ceph/addr.c
fs/netfs/buffered_read.c
fs/netfs/io.c
include/trace/events/netfs.h

index 8c16bc5250ef56cb8d0864d3b4b054466d57f250..73b5a07bf94deed7402885d3d4b1bfd36877afdd 100644 (file)
@@ -498,6 +498,11 @@ const struct netfs_request_ops ceph_netfs_ops = {
 };
 
 #ifdef CONFIG_CEPH_FSCACHE
+static void ceph_set_page_fscache(struct page *page)
+{
+       folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
+}
+
 static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
 {
        struct inode *inode = priv;
@@ -515,6 +520,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
                               ceph_fscache_write_terminated, inode, true, caching);
 }
 #else
+static inline void ceph_set_page_fscache(struct page *page)
+{
+}
+
 static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
 {
 }
@@ -706,6 +715,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                len = wlen;
 
        set_page_writeback(page);
+       if (caching)
+               ceph_set_page_fscache(page);
        ceph_fscache_write_to_cache(inode, page_off, len, caching);
 
        if (IS_ENCRYPTED(inode)) {
@@ -789,6 +800,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
                return AOP_WRITEPAGE_ACTIVATE;
        }
 
+       folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
+
        err = writepage_nounlock(page, wbc);
        if (err == -ERESTARTSYS) {
                /* direct memory reclaimer was killed by SIGKILL. return 0
@@ -1062,7 +1075,8 @@ get_more_pages:
                                unlock_page(page);
                                break;
                        }
-                       if (PageWriteback(page)) {
+                       if (PageWriteback(page) ||
+                           PagePrivate2(page) /* [DEPRECATED] */) {
                                if (wbc->sync_mode == WB_SYNC_NONE) {
                                        doutc(cl, "%p under writeback\n", page);
                                        unlock_page(page);
@@ -1070,6 +1084,7 @@ get_more_pages:
                                }
                                doutc(cl, "waiting on writeback %p\n", page);
                                wait_on_page_writeback(page);
+                               folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
                        }
 
                        if (!clear_page_dirty_for_io(page)) {
@@ -1254,6 +1269,8 @@ new_request:
                        }
 
                        set_page_writeback(page);
+                       if (caching)
+                               ceph_set_page_fscache(page);
                        len += thp_size(page);
                }
                ceph_fscache_write_to_cache(inode, offset, len, caching);
index a688d4c75d9926864ae80d2d26a7e1ed6a34ac3d..424048f9ed1fdc664afbaef41c0816bb71abe0cd 100644 (file)
@@ -466,7 +466,7 @@ retry:
        if (!netfs_is_cache_enabled(ctx) &&
            netfs_skip_folio_read(folio, pos, len, false)) {
                netfs_stat(&netfs_n_rh_write_zskip);
-               goto have_folio;
+               goto have_folio_no_wait;
        }
 
        rreq = netfs_alloc_request(mapping, file,
@@ -507,6 +507,12 @@ retry:
        netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
+       if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
+               ret = folio_wait_private_2_killable(folio);
+               if (ret < 0)
+                       goto error;
+       }
+have_folio_no_wait:
        *_folio = folio;
        _leave(" = 0");
        return 0;
index c93851b9836889e87257cd058eec8926201738b2..c179a1c73fa703a5849975f44682222625f023c2 100644 (file)
@@ -98,6 +98,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
        netfs_put_request(rreq, was_async, netfs_rreq_trace_put_complete);
 }
 
+/*
+ * [DEPRECATED] Deal with the completion of writing the data to the cache.  We
+ * have to clear the PG_fscache bits on the folios involved and release the
+ * caller's ref.
+ *
+ * May be called in softirq mode and we inherit a ref from the caller.
+ */
+static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
+                                         bool was_async)
+{
+       struct netfs_io_subrequest *subreq;
+       struct folio *folio;
+       pgoff_t unlocked = 0;
+       bool have_unlocked = false;
+
+       rcu_read_lock();
+
+       list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+               XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
+
+               xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+                       if (xas_retry(&xas, folio))
+                               continue;
+
+                       /* We might have multiple writes from the same huge
+                        * folio, but we mustn't unlock a folio more than once.
+                        */
+                       if (have_unlocked && folio->index <= unlocked)
+                               continue;
+                       unlocked = folio_next_index(folio) - 1;
+                       trace_netfs_folio(folio, netfs_folio_trace_end_copy);
+                       folio_end_private_2(folio);
+                       have_unlocked = true;
+               }
+       }
+
+       rcu_read_unlock();
+       netfs_rreq_completed(rreq, was_async);
+}
+
+static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
+                                      bool was_async) /* [DEPRECATED] */
+{
+       struct netfs_io_subrequest *subreq = priv;
+       struct netfs_io_request *rreq = subreq->rreq;
+
+       if (IS_ERR_VALUE(transferred_or_error)) {
+               netfs_stat(&netfs_n_rh_write_failed);
+               trace_netfs_failure(rreq, subreq, transferred_or_error,
+                                   netfs_fail_copy_to_cache);
+       } else {
+               netfs_stat(&netfs_n_rh_write_done);
+       }
+
+       trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
+
+       /* If we decrement nr_copy_ops to 0, the ref belongs to us. */
+       if (atomic_dec_and_test(&rreq->nr_copy_ops))
+               netfs_rreq_unmark_after_write(rreq, was_async);
+
+       netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+}
+
+/*
+ * [DEPRECATED] Perform any outstanding writes to the cache.  We inherit a ref
+ * from the caller.
+ */
+static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
+{
+       struct netfs_cache_resources *cres = &rreq->cache_resources;
+       struct netfs_io_subrequest *subreq, *next, *p;
+       struct iov_iter iter;
+       int ret;
+
+       trace_netfs_rreq(rreq, netfs_rreq_trace_copy);
+
+       /* We don't want terminating writes trying to wake us up whilst we're
+        * still going through the list.
+        */
+       atomic_inc(&rreq->nr_copy_ops);
+
+       list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
+               if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+                       list_del_init(&subreq->rreq_link);
+                       netfs_put_subrequest(subreq, false,
+                                            netfs_sreq_trace_put_no_copy);
+               }
+       }
+
+       list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+               /* Amalgamate adjacent writes */
+               while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+                       next = list_next_entry(subreq, rreq_link);
+                       if (next->start != subreq->start + subreq->len)
+                               break;
+                       subreq->len += next->len;
+                       list_del_init(&next->rreq_link);
+                       netfs_put_subrequest(next, false,
+                                            netfs_sreq_trace_put_merged);
+               }
+
+               ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
+                                              subreq->len, rreq->i_size, true);
+               if (ret < 0) {
+                       trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
+                       trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
+                       continue;
+               }
+
+               iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages,
+                               subreq->start, subreq->len);
+
+               atomic_inc(&rreq->nr_copy_ops);
+               netfs_stat(&netfs_n_rh_write);
+               netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache);
+               trace_netfs_sreq(subreq, netfs_sreq_trace_write);
+               cres->ops->write(cres, subreq->start, &iter,
+                                netfs_rreq_copy_terminated, subreq);
+       }
+
+       /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
+       if (atomic_dec_and_test(&rreq->nr_copy_ops))
+               netfs_rreq_unmark_after_write(rreq, false);
+}
+
+static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */
+{
+       struct netfs_io_request *rreq =
+               container_of(work, struct netfs_io_request, work);
+
+       netfs_rreq_do_write_to_cache(rreq);
+}
+
+static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */
+{
+       rreq->work.func = netfs_rreq_write_to_cache_work;
+       if (!queue_work(system_unbound_wq, &rreq->work))
+               BUG();
+}
+
 /*
  * Handle a short read.
  */
@@ -275,6 +415,10 @@ again:
        clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
        wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
 
+       if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) &&
+           test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags))
+               return netfs_rreq_write_to_cache(rreq);
+
        netfs_rreq_completed(rreq, was_async);
 }
 
index da23484268dfcfd6deeee9b9c40447b034d775fc..24ec3434d32ee9fe20896fd705e360e17bd34e0d 100644 (file)
        EM(netfs_folio_trace_clear_g,           "clear-g")      \
        EM(netfs_folio_trace_clear_s,           "clear-s")      \
        EM(netfs_folio_trace_copy_to_cache,     "mark-copy")    \
+       EM(netfs_folio_trace_end_copy,          "end-copy")     \
        EM(netfs_folio_trace_filled_gaps,       "filled-gaps")  \
        EM(netfs_folio_trace_kill,              "kill")         \
        EM(netfs_folio_trace_kill_cc,           "kill-cc")      \