Merge tag 'pm-6.16-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[linux-2.6-block.git] / fs / jbd2 / commit.c
CommitLineData
f5166768 1// SPDX-License-Identifier: GPL-2.0+
470decc6 2/*
f7f4bccb 3 * linux/fs/jbd2/commit.c
470decc6
DK
4 *
5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6 *
7 * Copyright 1998 Red Hat corp --- All Rights Reserved
8 *
470decc6
DK
9 * Journal commit routines for the generic filesystem journaling code;
10 * part of the ext2fs journaling system.
11 */
12
13#include <linux/time.h>
14#include <linux/fs.h>
f7f4bccb 15#include <linux/jbd2.h>
470decc6
DK
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/mm.h>
19#include <linux/pagemap.h>
8e85fb3f 20#include <linux/jiffies.h>
818d276c 21#include <linux/crc32.h>
cd1aac32
AK
22#include <linux/writeback.h>
23#include <linux/backing-dev.h>
fd98496f 24#include <linux/bio.h>
0e3d2a63 25#include <linux/blkdev.h>
39e3ac25 26#include <linux/bitops.h>
879c5e6b 27#include <trace/events/jbd2.h>
470decc6
DK
28
29/*
b34090e5 30 * IO end handler for temporary buffer_heads handling writes to the journal.
470decc6
DK
31 */
32static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
33{
b34090e5
JK
34 struct buffer_head *orig_bh = bh->b_private;
35
470decc6
DK
36 BUFFER_TRACE(bh, "");
37 if (uptodate)
38 set_buffer_uptodate(bh);
39 else
40 clear_buffer_uptodate(bh);
b34090e5
JK
41 if (orig_bh) {
42 clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
4e857c58 43 smp_mb__after_atomic();
b34090e5
JK
44 wake_up_bit(&orig_bh->b_state, BH_Shadow);
45 }
470decc6
DK
46 unlock_buffer(bh);
47}
48
49/*
87c89c23
JK
50 * When an ext4 file is truncated, it is possible that some pages are not
51 * successfully freed, because they are attached to a committing transaction.
470decc6
DK
52 * After the transaction commits, these pages are left on the LRU, with no
53 * ->mapping, and with attached buffers. These pages are trivially reclaimable
54 * by the VM, but their apparent absence upsets the VM accounting, and it makes
55 * the numbers in /proc/meminfo look odd.
56 *
57 * So here, we have a buffer which has just come off the forget list. Look to
58 * see if we can strip all buffers from the backing page.
59 *
fd3b3d7f
KS
60 * Called under j_list_lock. The caller provided us with a ref against the
61 * buffer, and we drop that here.
470decc6
DK
62 */
63static void release_buffer_page(struct buffer_head *bh)
64{
73122255 65 struct folio *folio;
470decc6
DK
66
67 if (buffer_dirty(bh))
68 goto nope;
69 if (atomic_read(&bh->b_count) != 1)
70 goto nope;
0d22fe2f 71 folio = bh->b_folio;
73122255 72 if (folio->mapping)
470decc6
DK
73 goto nope;
74
75 /* OK, it's a truncated page */
73122255 76 if (!folio_trylock(folio))
470decc6
DK
77 goto nope;
78
73122255 79 folio_get(folio);
470decc6 80 __brelse(bh);
68189fef 81 try_to_free_buffers(folio);
73122255
MWO
82 folio_unlock(folio);
83 folio_put(folio);
470decc6
DK
84 return;
85
86nope:
87 __brelse(bh);
88}
89
e5a120ae 90static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
1f56c589
DW
91{
92 struct commit_header *h;
93 __u32 csum;
94
db9ee220 95 if (!jbd2_journal_has_csum_v2or3(j))
1f56c589
DW
96 return;
97
e5a120ae 98 h = (struct commit_header *)(bh->b_data);
1f56c589
DW
99 h->h_chksum_type = 0;
100 h->h_chksum_size = 0;
101 h->h_chksum[0] = 0;
76005718 102 csum = jbd2_chksum(j->j_csum_seed, bh->b_data, j->j_blocksize);
1f56c589
DW
103 h->h_chksum[0] = cpu_to_be32(csum);
104}
105
818d276c
GS
106/*
107 * Done it all: now submit the commit record. We should have
470decc6
DK
108 * cleaned up our previous buffers by now, so if we are in abort
109 * mode we can now just skip the rest of the journal write
110 * entirely.
111 *
112 * Returns 1 if the journal needs to be aborted or 0 on success
113 */
818d276c
GS
114static int journal_submit_commit_record(journal_t *journal,
115 transaction_t *commit_transaction,
116 struct buffer_head **cbh,
117 __u32 crc32_sum)
470decc6 118{
818d276c 119 struct commit_header *tmp;
470decc6 120 struct buffer_head *bh;
b42d1d6b 121 struct timespec64 now;
6a3afb6a 122 blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS;
470decc6 123
6cba611e
ZH
124 *cbh = NULL;
125
470decc6
DK
126 if (is_journal_aborted(journal))
127 return 0;
128
32ab6715
JK
129 bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
130 JBD2_COMMIT_BLOCK);
e5a120ae 131 if (!bh)
470decc6
DK
132 return 1;
133
818d276c 134 tmp = (struct commit_header *)bh->b_data;
b42d1d6b 135 ktime_get_coarse_real_ts64(&now);
736603ab
TT
136 tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
137 tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
818d276c 138
56316a0d 139 if (jbd2_has_feature_checksum(journal)) {
818d276c
GS
140 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
141 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
142 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
470decc6 143 }
e5a120ae 144 jbd2_commit_block_csum_set(journal, bh);
470decc6 145
e5a120ae 146 BUFFER_TRACE(bh, "submit commit block");
818d276c 147 lock_buffer(bh);
45a90bfd 148 clear_buffer_dirty(bh);
818d276c
GS
149 set_buffer_uptodate(bh);
150 bh->b_end_io = journal_end_buffer_io_sync;
151
152 if (journal->j_flags & JBD2_BARRIER &&
56316a0d 153 !jbd2_has_feature_async_commit(journal))
f3ed5df3 154 write_flags |= REQ_PREFLUSH | REQ_FUA;
9c35575b 155
f3ed5df3 156 submit_bh(write_flags, bh);
818d276c 157 *cbh = bh;
f3ed5df3 158 return 0;
818d276c
GS
159}
160
161/*
162 * This function along with journal_submit_commit_record
163 * allows to write the commit record asynchronously.
164 */
fd98496f
TT
165static int journal_wait_on_commit_record(journal_t *journal,
166 struct buffer_head *bh)
818d276c
GS
167{
168 int ret = 0;
169
170 clear_buffer_dirty(bh);
171 wait_on_buffer(bh);
470decc6 172
818d276c
GS
173 if (unlikely(!buffer_uptodate(bh)))
174 ret = -EIO;
175 put_bh(bh); /* One for getblk() */
818d276c
GS
176
177 return ret;
470decc6
DK
178}
179
ff780b91 180/* Send all the data buffers related to an inode */
f30ff35f 181int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
ff780b91 182{
ff780b91
HS
183 if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
184 return 0;
185
186 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
f30ff35f 187 return journal->j_submit_inode_data_buffers(jinode);
ff780b91
HS
188
189}
190EXPORT_SYMBOL(jbd2_submit_inode_data);
191
192int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
193{
194 if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
195 !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
196 return 0;
197 return filemap_fdatawait_range_keep_errors(
198 jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
199 jinode->i_dirty_end);
200}
201EXPORT_SYMBOL(jbd2_wait_inode_data);
202
c851ed54
JK
203/*
204 * Submit all the data buffers of inode associated with the transaction to
205 * disk.
206 *
207 * We are in a committing transaction. Therefore no new inode can be added to
208 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
209 * operate on from being released while we write out pages.
210 */
cd1aac32 211static int journal_submit_data_buffers(journal_t *journal,
c851ed54
JK
212 transaction_t *commit_transaction)
213{
214 struct jbd2_inode *jinode;
215 int err, ret = 0;
c851ed54
JK
216
217 spin_lock(&journal->j_list_lock);
218 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
41617e1a
JK
219 if (!(jinode->i_flags & JI_WRITE_DATA))
220 continue;
cb0d9d47 221 jinode->i_flags |= JI_COMMIT_RUNNING;
c851ed54 222 spin_unlock(&journal->j_list_lock);
342af94e 223 /* submit the inode data buffers. */
879c5e6b 224 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
342af94e
MFO
225 if (journal->j_submit_inode_data_buffers) {
226 err = journal->j_submit_inode_data_buffers(jinode);
227 if (!ret)
228 ret = err;
229 }
c851ed54
JK
230 spin_lock(&journal->j_list_lock);
231 J_ASSERT(jinode->i_transaction == commit_transaction);
cb0d9d47
JK
232 jinode->i_flags &= ~JI_COMMIT_RUNNING;
233 smp_mb();
c851ed54
JK
234 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
235 }
236 spin_unlock(&journal->j_list_lock);
237 return ret;
238}
239
aa3c0c61
MFO
240int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
241{
242 struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
243
244 return filemap_fdatawait_range_keep_errors(mapping,
245 jinode->i_dirty_start,
246 jinode->i_dirty_end);
247}
248
c851ed54
JK
249/*
250 * Wait for data submitted for writeout, refile inodes to proper
251 * transaction if needed.
252 *
253 */
254static int journal_finish_inode_data_buffers(journal_t *journal,
255 transaction_t *commit_transaction)
256{
257 struct jbd2_inode *jinode, *next_i;
258 int err, ret = 0;
259
cd1aac32 260 /* For locking, see the comment in journal_submit_data_buffers() */
c851ed54
JK
261 spin_lock(&journal->j_list_lock);
262 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
41617e1a
JK
263 if (!(jinode->i_flags & JI_WAIT_DATA))
264 continue;
cb0d9d47 265 jinode->i_flags |= JI_COMMIT_RUNNING;
c851ed54 266 spin_unlock(&journal->j_list_lock);
342af94e
MFO
267 /* wait for the inode data buffers writeout. */
268 if (journal->j_finish_inode_data_buffers) {
269 err = journal->j_finish_inode_data_buffers(jinode);
270 if (!ret)
271 ret = err;
272 }
6c02757c 273 cond_resched();
c851ed54 274 spin_lock(&journal->j_list_lock);
cb0d9d47
JK
275 jinode->i_flags &= ~JI_COMMIT_RUNNING;
276 smp_mb();
c851ed54
JK
277 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
278 }
279
280 /* Now refile inode to proper lists */
281 list_for_each_entry_safe(jinode, next_i,
282 &commit_transaction->t_inode_list, i_list) {
283 list_del(&jinode->i_list);
284 if (jinode->i_next_transaction) {
285 jinode->i_transaction = jinode->i_next_transaction;
286 jinode->i_next_transaction = NULL;
287 list_add(&jinode->i_list,
288 &jinode->i_transaction->t_inode_list);
289 } else {
290 jinode->i_transaction = NULL;
6ba0e7dc
RZ
291 jinode->i_dirty_start = 0;
292 jinode->i_dirty_end = 0;
c851ed54
JK
293 }
294 }
295 spin_unlock(&journal->j_list_lock);
296
297 return ret;
298}
299
818d276c
GS
300static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
301{
818d276c
GS
302 char *addr;
303 __u32 checksum;
304
147d4a09
RHI
305 addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
306 checksum = crc32_be(crc32_sum, addr, bh->b_size);
307 kunmap_local(addr);
818d276c
GS
308
309 return checksum;
310}
311
db9ee220 312static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
18eba7aa 313 unsigned long long block)
b517bea1
ZB
314{
315 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
56316a0d 316 if (jbd2_has_feature_64bit(j))
b517bea1
ZB
317 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
318}
319
c3900875
DW
320static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
321 struct buffer_head *bh, __u32 sequence)
322{
db9ee220 323 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
c3900875 324 __u8 *addr;
eee06c56 325 __u32 csum32;
18a6ea1e 326 __be32 seq;
c3900875 327
db9ee220 328 if (!jbd2_journal_has_csum_v2or3(j))
c3900875
DW
329 return;
330
18a6ea1e 331 seq = cpu_to_be32(sequence);
147d4a09 332 addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
76005718
EB
333 csum32 = jbd2_chksum(j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
334 csum32 = jbd2_chksum(csum32, addr, bh->b_size);
147d4a09 335 kunmap_local(addr);
c3900875 336
56316a0d 337 if (jbd2_has_feature_csum3(j))
db9ee220
DW
338 tag3->t_checksum = cpu_to_be32(csum32);
339 else
340 tag->t_checksum = cpu_to_be16(csum32);
c3900875 341}
470decc6 342/*
f7f4bccb 343 * jbd2_journal_commit_transaction
470decc6
DK
344 *
345 * The primary function for committing a transaction to the log. This
346 * function is called by the journal thread to begin a complete commit.
347 */
f7f4bccb 348void jbd2_journal_commit_transaction(journal_t *journal)
470decc6 349{
8e85fb3f 350 struct transaction_stats_s stats;
470decc6 351 transaction_t *commit_transaction;
e5a120ae
JK
352 struct journal_head *jh;
353 struct buffer_head *descriptor;
470decc6
DK
354 struct buffer_head **wbuf = journal->j_wbuf;
355 int bufs;
abe48a52 356 int escape;
470decc6 357 int err;
18eba7aa 358 unsigned long long blocknr;
e07f7183
JB
359 ktime_t start_time;
360 u64 commit_time;
470decc6 361 char *tagp = NULL;
470decc6
DK
362 journal_block_tag_t *tag = NULL;
363 int space_left = 0;
364 int first_tag = 0;
365 int tag_flag;
794446c6 366 int i;
b517bea1 367 int tag_bytes = journal_tag_bytes(journal);
818d276c
GS
368 struct buffer_head *cbh = NULL; /* For transactional checksums */
369 __u32 crc32_sum = ~0;
82f04ab4 370 struct blk_plug plug;
3339578f
JK
371 /* Tail of the journal */
372 unsigned long first_block;
373 tid_t first_tid;
374 int update_tail;
3caa487f 375 int csum_size = 0;
f5113eff 376 LIST_HEAD(io_bufs);
e5a120ae 377 LIST_HEAD(log_bufs);
3caa487f 378
db9ee220 379 if (jbd2_journal_has_csum_v2or3(journal))
3caa487f 380 csum_size = sizeof(struct jbd2_journal_block_tail);
470decc6
DK
381
382 /*
383 * First job: lock down the current transaction and wait for
384 * all outstanding updates to complete.
385 */
386
f7f4bccb
MC
387 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
388 if (journal->j_flags & JBD2_FLUSHED) {
cb3b3bf2 389 jbd2_debug(3, "super block updated\n");
6fa7aa50 390 mutex_lock_io(&journal->j_checkpoint_mutex);
79feb521
JK
391 /*
392 * We hold j_checkpoint_mutex so tail cannot change under us.
393 * We don't need any special data guarantees for writing sb
394 * since journal is empty and it is ok for write to be
395 * flushed only with transaction commit.
396 */
397 jbd2_journal_update_sb_log_tail(journal,
398 journal->j_tail_sequence,
6a3afb6a 399 journal->j_tail, 0);
a78bb11d 400 mutex_unlock(&journal->j_checkpoint_mutex);
470decc6 401 } else {
cb3b3bf2 402 jbd2_debug(3, "superblock not updated\n");
470decc6
DK
403 }
404
405 J_ASSERT(journal->j_running_transaction != NULL);
406 J_ASSERT(journal->j_committing_transaction == NULL);
407
ff780b91
HS
408 write_lock(&journal->j_state_lock);
409 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
410 while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
411 DEFINE_WAIT(wait);
412
413 prepare_to_wait(&journal->j_fc_wait, &wait,
414 TASK_UNINTERRUPTIBLE);
415 write_unlock(&journal->j_state_lock);
416 schedule();
417 write_lock(&journal->j_state_lock);
418 finish_wait(&journal->j_fc_wait, &wait);
cc80586a
HS
419 /*
420 * TODO: by blocking fast commits here, we are increasing
421 * fsync() latency slightly. Strictly speaking, we don't need
422 * to block fast commits until the transaction enters T_FLUSH
423 * state. So an optimization is possible where we block new fast
424 * commits here and wait for existing ones to complete
425 * just before we enter T_FLUSH. That way, the existing fast
426 * commits and this full commit can proceed parallely.
427 */
ff780b91
HS
428 }
429 write_unlock(&journal->j_state_lock);
430
470decc6 431 commit_transaction = journal->j_running_transaction;
470decc6 432
879c5e6b 433 trace_jbd2_start_commit(journal, commit_transaction);
cb3b3bf2 434 jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
470decc6
DK
435 commit_transaction->t_tid);
436
a931da6a 437 write_lock(&journal->j_state_lock);
ff780b91 438 journal->j_fc_off = 0;
3ca841c1 439 J_ASSERT(commit_transaction->t_state == T_RUNNING);
470decc6
DK
440 commit_transaction->t_state = T_LOCKED;
441
879c5e6b 442 trace_jbd2_commit_locking(journal, commit_transaction);
bf699327 443 stats.run.rs_wait = commit_transaction->t_max_wait;
9fff24aa 444 stats.run.rs_request_delay = 0;
bf699327 445 stats.run.rs_locked = jiffies;
9fff24aa
TT
446 if (commit_transaction->t_requested)
447 stats.run.rs_request_delay =
448 jbd2_time_diff(commit_transaction->t_requested,
449 stats.run.rs_locked);
bf699327
TT
450 stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
451 stats.run.rs_locked);
8e85fb3f 452
4f981868
RH
453 // waits for any t_updates to finish
454 jbd2_journal_wait_updates(journal);
470decc6 455
96f1e097 456 commit_transaction->t_state = T_SWITCH;
470decc6 457
a51dca9c 458 J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
470decc6
DK
459 journal->j_max_transaction_buffers);
460
461 /*
462 * First thing we are allowed to do is to discard any remaining
463 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
464 * that there are no such buffers: if a large filesystem
465 * operation like a truncate needs to split itself over multiple
f7f4bccb 466 * transactions, then it may try to do a jbd2_journal_restart() while
470decc6
DK
467 * there are still BJ_Reserved buffers outstanding. These must
468 * be released cleanly from the current transaction.
469 *
470 * In this case, the filesystem must still reserve write access
471 * again before modifying the buffer in the new transaction, but
472 * we do not require it to remember exactly which old buffers it
473 * has reserved. This is consistent with the existing behaviour
f7f4bccb 474 * that multiple jbd2_journal_get_write_access() calls to the same
25985edc 475 * buffer are perfectly permissible.
23e3d7f7
YB
476 * We use journal->j_state_lock here to serialize processing of
477 * t_reserved_list with eviction of buffers from journal_unmap_buffer().
470decc6
DK
478 */
479 while (commit_transaction->t_reserved_list) {
480 jh = commit_transaction->t_reserved_list;
481 JBUFFER_TRACE(jh, "reserved, unused: refile");
482 /*
f7f4bccb 483 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
470decc6
DK
484 * leave undo-committed data.
485 */
486 if (jh->b_committed_data) {
487 struct buffer_head *bh = jh2bh(jh);
488
46417064 489 spin_lock(&jh->b_state_lock);
af1e76d6 490 jbd2_free(jh->b_committed_data, bh->b_size);
470decc6 491 jh->b_committed_data = NULL;
46417064 492 spin_unlock(&jh->b_state_lock);
470decc6 493 }
f7f4bccb 494 jbd2_journal_refile_buffer(journal, jh);
470decc6
DK
495 }
496
23e3d7f7 497 write_unlock(&journal->j_state_lock);
470decc6
DK
498 /*
499 * Now try to drop any written-back buffers from the journal's
500 * checkpoint lists. We do this *before* commit because it potentially
501 * frees some memory
502 */
503 spin_lock(&journal->j_list_lock);
26770a71 504 __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
470decc6
DK
505 spin_unlock(&journal->j_list_lock);
506
cb3b3bf2 507 jbd2_debug(3, "JBD2: commit phase 1\n");
470decc6 508
1ba37268
YY
509 /*
510 * Clear revoked flag to reflect there is no revoked buffers
511 * in the next transaction which is going to be started.
512 */
513 jbd2_clear_buffer_revoked_flags(journal);
514
470decc6
DK
515 /*
516 * Switch to a new revoke table.
517 */
f7f4bccb 518 jbd2_journal_switch_revoke_table(journal);
470decc6 519
a89573ce 520 write_lock(&journal->j_state_lock);
8f7d89f3
JK
521 /*
522 * Reserved credits cannot be claimed anymore, free them
523 */
524 atomic_sub(atomic_read(&journal->j_reserved_credits),
525 &commit_transaction->t_outstanding_credits);
526
879c5e6b 527 trace_jbd2_commit_flushing(journal, commit_transaction);
bf699327
TT
528 stats.run.rs_flushing = jiffies;
529 stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
530 stats.run.rs_flushing);
8e85fb3f 531
470decc6
DK
532 commit_transaction->t_state = T_FLUSH;
533 journal->j_committing_transaction = commit_transaction;
534 journal->j_running_transaction = NULL;
e07f7183 535 start_time = ktime_get();
470decc6 536 commit_transaction->t_log_start = journal->j_head;
34fc8768 537 wake_up_all(&journal->j_wait_transaction_locked);
a931da6a 538 write_unlock(&journal->j_state_lock);
470decc6 539
cb3b3bf2 540 jbd2_debug(3, "JBD2: commit phase 2a\n");
470decc6 541
470decc6
DK
542 /*
543 * Now start flushing things to disk, in the order they appear
544 * on the transaction lists. Data blocks go first.
545 */
cd1aac32 546 err = journal_submit_data_buffers(journal, commit_transaction);
470decc6 547 if (err)
a7fa2baf 548 jbd2_journal_abort(journal, err);
470decc6 549
82f04ab4 550 blk_start_plug(&plug);
9bcf976c 551 jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
470decc6 552
cb3b3bf2 553 jbd2_debug(3, "JBD2: commit phase 2b\n");
470decc6 554
470decc6
DK
555 /*
556 * Way to go: we have now written out all of the data for a
557 * transaction! Now comes the tricky part: we need to write out
558 * metadata. Loop over the transaction's entire buffer list:
559 */
a931da6a 560 write_lock(&journal->j_state_lock);
470decc6 561 commit_transaction->t_state = T_COMMIT;
a931da6a 562 write_unlock(&journal->j_state_lock);
470decc6 563
879c5e6b 564 trace_jbd2_commit_logging(journal, commit_transaction);
bf699327
TT
565 stats.run.rs_logging = jiffies;
566 stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
567 stats.run.rs_logging);
9f356e5a 568 stats.run.rs_blocks = commit_transaction->t_nr_buffers;
bf699327 569 stats.run.rs_blocks_logged = 0;
8e85fb3f 570
1dfc3220 571 J_ASSERT(commit_transaction->t_nr_buffers <=
a51dca9c 572 atomic_read(&commit_transaction->t_outstanding_credits));
1dfc3220 573
470decc6 574 bufs = 0;
e5a120ae 575 descriptor = NULL;
470decc6
DK
576 while (commit_transaction->t_buffers) {
577
578 /* Find the next buffer to be journaled... */
579
580 jh = commit_transaction->t_buffers;
581
582 /* If we're in abort mode, we just un-journal the buffer and
7ad7445f 583 release it. */
470decc6
DK
584
585 if (is_journal_aborted(journal)) {
7ad7445f 586 clear_buffer_jbddirty(jh2bh(jh));
470decc6 587 JBUFFER_TRACE(jh, "journal is aborting: refile");
e06c8227
JB
588 jbd2_buffer_abort_trigger(jh,
589 jh->b_frozen_data ?
590 jh->b_frozen_triggers :
591 jh->b_triggers);
f7f4bccb 592 jbd2_journal_refile_buffer(journal, jh);
470decc6
DK
593 /* If that was the last one, we need to clean up
594 * any descriptor buffers which may have been
595 * already allocated, even if we are now
596 * aborting. */
597 if (!commit_transaction->t_buffers)
598 goto start_journal_io;
599 continue;
600 }
601
602 /* Make sure we have a descriptor block in which to
603 record the metadata buffer. */
604
605 if (!descriptor) {
470decc6
DK
606 J_ASSERT (bufs == 0);
607
cb3b3bf2 608 jbd2_debug(4, "JBD2: get descriptor\n");
470decc6 609
32ab6715
JK
610 descriptor = jbd2_journal_get_descriptor_buffer(
611 commit_transaction,
612 JBD2_DESCRIPTOR_BLOCK);
470decc6 613 if (!descriptor) {
a7fa2baf 614 jbd2_journal_abort(journal, -EIO);
470decc6
DK
615 continue;
616 }
617
cb3b3bf2 618 jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
e5a120ae
JK
619 (unsigned long long)descriptor->b_blocknr,
620 descriptor->b_data);
e5a120ae
JK
621 tagp = &descriptor->b_data[sizeof(journal_header_t)];
622 space_left = descriptor->b_size -
623 sizeof(journal_header_t);
470decc6 624 first_tag = 1;
e5a120ae
JK
625 set_buffer_jwrite(descriptor);
626 set_buffer_dirty(descriptor);
627 wbuf[bufs++] = descriptor;
470decc6
DK
628
629 /* Record it so that we can wait for IO
630 completion later */
e5a120ae
JK
631 BUFFER_TRACE(descriptor, "ph3: file as descriptor");
632 jbd2_file_log_bh(&log_bufs, descriptor);
470decc6
DK
633 }
634
635 /* Where is the buffer to be written? */
636
f7f4bccb 637 err = jbd2_journal_next_log_block(journal, &blocknr);
470decc6
DK
638 /* If the block mapping failed, just abandon the buffer
639 and repeat this loop: we'll fall into the
640 refile-on-abort condition above. */
641 if (err) {
a7fa2baf 642 jbd2_journal_abort(journal, err);
470decc6
DK
643 continue;
644 }
645
646 /*
647 * start_this_handle() uses t_outstanding_credits to determine
0db45889 648 * the free space in the log.
470decc6 649 */
a51dca9c 650 atomic_dec(&commit_transaction->t_outstanding_credits);
470decc6
DK
651
652 /* Bump b_count to prevent truncate from stumbling over
653 the shadowed buffer! @@@ This can go if we ever get
f5113eff 654 rid of the shadow pairing of buffers. */
470decc6
DK
655 atomic_inc(&jh2bh(jh)->b_count);
656
470decc6 657 /*
f5113eff
JK
658 * Make a temporary IO buffer with which to write it out
659 * (this will requeue the metadata buffer to BJ_Shadow).
470decc6 660 */
f5113eff 661 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
470decc6 662 JBUFFER_TRACE(jh, "ph3: write metadata");
abe48a52 663 escape = jbd2_journal_write_metadata_buffer(commit_transaction,
f5113eff 664 jh, &wbuf[bufs], blocknr);
f5113eff 665 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
470decc6
DK
666
667 /* Record the new block's tag in the current descriptor
668 buffer */
669
670 tag_flag = 0;
abe48a52 671 if (escape)
f7f4bccb 672 tag_flag |= JBD2_FLAG_ESCAPE;
470decc6 673 if (!first_tag)
f7f4bccb 674 tag_flag |= JBD2_FLAG_SAME_UUID;
470decc6
DK
675
676 tag = (journal_block_tag_t *) tagp;
db9ee220 677 write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
8f888ef8 678 tag->t_flags = cpu_to_be16(tag_flag);
f5113eff 679 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
c3900875 680 commit_transaction->t_tid);
b517bea1
ZB
681 tagp += tag_bytes;
682 space_left -= tag_bytes;
f5113eff 683 bufs++;
470decc6
DK
684
685 if (first_tag) {
686 memcpy (tagp, journal->j_uuid, 16);
687 tagp += 16;
688 space_left -= 16;
689 first_tag = 0;
690 }
691
692 /* If there's no more to do, or if the descriptor is full,
693 let the IO rip! */
694
695 if (bufs == journal->j_wbufsize ||
696 commit_transaction->t_buffers == NULL ||
3caa487f 697 space_left < tag_bytes + 16 + csum_size) {
470decc6 698
cb3b3bf2 699 jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
470decc6
DK
700
701 /* Write an end-of-descriptor marker before
702 submitting the IOs. "tag" still points to
703 the last tag we set up. */
704
8f888ef8 705 tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
470decc6 706start_journal_io:
6e876c3d 707 if (descriptor)
708 jbd2_descriptor_block_csum_set(journal,
709 descriptor);
710
470decc6
DK
711 for (i = 0; i < bufs; i++) {
712 struct buffer_head *bh = wbuf[i];
6a3afb6a 713
818d276c
GS
714 /*
715 * Compute checksum.
716 */
56316a0d 717 if (jbd2_has_feature_checksum(journal)) {
818d276c
GS
718 crc32_sum =
719 jbd2_checksum_data(crc32_sum, bh);
720 }
721
470decc6
DK
722 lock_buffer(bh);
723 clear_buffer_dirty(bh);
724 set_buffer_uptodate(bh);
725 bh->b_end_io = journal_end_buffer_io_sync;
6a3afb6a
ZY
726 submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS,
727 bh);
470decc6
DK
728 }
729 cond_resched();
730
731 /* Force a new descriptor to be generated next
732 time round the loop. */
733 descriptor = NULL;
734 bufs = 0;
735 }
736 }
737
f73bee49
JK
738 err = journal_finish_inode_data_buffers(journal, commit_transaction);
739 if (err) {
740 printk(KERN_WARNING
6e969ef3
BL
741 "JBD2: Detected IO errors %d while flushing file data on %s\n",
742 err, journal->j_devname);
f73bee49
JK
743 err = 0;
744 }
745
3339578f
JK
746 /*
747 * Get current oldest transaction in the log before we issue flush
748 * to the filesystem device. After the flush we can be sure that
749 * blocks of all older transactions are checkpointed to persistent
750 * storage and we will be safe to update journal start in the
751 * superblock with the numbers we get here.
752 */
753 update_tail =
754 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
755
bbd2be36 756 write_lock(&journal->j_state_lock);
3339578f
JK
757 if (update_tail) {
758 long freed = first_block - journal->j_tail;
759
760 if (first_block < journal->j_tail)
761 freed += journal->j_last - journal->j_first;
762 /* Update tail only if we free significant amount of space */
4aa99c71 763 if (freed < journal->j_max_transaction_buffers)
3339578f
JK
764 update_tail = 0;
765 }
bbd2be36
JK
766 J_ASSERT(commit_transaction->t_state == T_COMMIT);
767 commit_transaction->t_state = T_COMMIT_DFLUSH;
768 write_unlock(&journal->j_state_lock);
3339578f 769
4f981868 770 /*
cc3e1bea
TT
771 * If the journal is not located on the file system device,
772 * then we must flush the file system device before we issue
a0851ea9 773 * the commit record and update the journal tail sequence.
cc3e1bea 774 */
a0851ea9 775 if ((commit_transaction->t_need_data_flush || update_tail) &&
cc3e1bea
TT
776 (journal->j_fs_dev != journal->j_dev) &&
777 (journal->j_flags & JBD2_BARRIER))
c6bf3f0e 778 blkdev_issue_flush(journal->j_fs_dev);
818d276c 779
cc3e1bea 780 /* Done it all: now write the commit record asynchronously. */
56316a0d 781 if (jbd2_has_feature_async_commit(journal)) {
818d276c
GS
782 err = journal_submit_commit_record(journal, commit_transaction,
783 &cbh, crc32_sum);
784 if (err)
d0a186e0 785 jbd2_journal_abort(journal, err);
e9e34f4e 786 }
c851ed54 787
82f04ab4
JA
788 blk_finish_plug(&plug);
789
470decc6
DK
790 /* Lo and behold: we have just managed to send a transaction to
791 the log. Before we can commit it, wait for the IO so far to
792 complete. Control buffers being written are on the
793 transaction's t_log_list queue, and metadata buffers are on
f5113eff 794 the io_bufs list.
470decc6
DK
795
796 Wait for the buffers in reverse order. That way we are
797 less likely to be woken up until all IOs have completed, and
798 so we incur less scheduling load.
799 */
800
cb3b3bf2 801 jbd2_debug(3, "JBD2: commit phase 3\n");
470decc6 802
f5113eff
JK
803 while (!list_empty(&io_bufs)) {
804 struct buffer_head *bh = list_entry(io_bufs.prev,
805 struct buffer_head,
806 b_assoc_buffers);
470decc6 807
f5113eff
JK
808 wait_on_buffer(bh);
809 cond_resched();
470decc6
DK
810
811 if (unlikely(!buffer_uptodate(bh)))
812 err = -EIO;
f5113eff 813 jbd2_unfile_log_bh(bh);
015c6033 814 stats.run.rs_blocks_logged++;
470decc6
DK
815
816 /*
f5113eff
JK
817 * The list contains temporary buffer heads created by
818 * jbd2_journal_write_metadata_buffer().
470decc6
DK
819 */
820 BUFFER_TRACE(bh, "dumping temporary bh");
470decc6
DK
821 __brelse(bh);
822 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
823 free_buffer_head(bh);
824
f5113eff 825 /* We also have to refile the corresponding shadowed buffer */
470decc6
DK
826 jh = commit_transaction->t_shadow_list->b_tprev;
827 bh = jh2bh(jh);
f5113eff 828 clear_buffer_jwrite(bh);
470decc6 829 J_ASSERT_BH(bh, buffer_jbddirty(bh));
b34090e5 830 J_ASSERT_BH(bh, !buffer_shadow(bh));
470decc6
DK
831
832 /* The metadata is now released for reuse, but we need
833 to remember it against this transaction so that when
834 we finally commit, we can do any checkpointing
835 required. */
836 JBUFFER_TRACE(jh, "file as BJ_Forget");
f7f4bccb 837 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
470decc6
DK
838 JBUFFER_TRACE(jh, "brelse shadowed buffer");
839 __brelse(bh);
840 }
841
842 J_ASSERT (commit_transaction->t_shadow_list == NULL);
843
cb3b3bf2 844 jbd2_debug(3, "JBD2: commit phase 4\n");
470decc6
DK
845
846 /* Here we wait for the revoke record and descriptor record buffers */
e5a120ae 847 while (!list_empty(&log_bufs)) {
470decc6
DK
848 struct buffer_head *bh;
849
e5a120ae
JK
850 bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
851 wait_on_buffer(bh);
852 cond_resched();
470decc6
DK
853
854 if (unlikely(!buffer_uptodate(bh)))
855 err = -EIO;
856
857 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
858 clear_buffer_jwrite(bh);
e5a120ae 859 jbd2_unfile_log_bh(bh);
015c6033 860 stats.run.rs_blocks_logged++;
470decc6
DK
861 __brelse(bh); /* One for getblk */
862 /* AKPM: bforget here */
863 }
864
77e841de
HK
865 if (err)
866 jbd2_journal_abort(journal, err);
867
cb3b3bf2 868 jbd2_debug(3, "JBD2: commit phase 5\n");
bbd2be36
JK
869 write_lock(&journal->j_state_lock);
870 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
871 commit_transaction->t_state = T_COMMIT_JFLUSH;
872 write_unlock(&journal->j_state_lock);
470decc6 873
56316a0d 874 if (!jbd2_has_feature_async_commit(journal)) {
818d276c
GS
875 err = journal_submit_commit_record(journal, commit_transaction,
876 &cbh, crc32_sum);
877 if (err)
d0a186e0 878 jbd2_journal_abort(journal, err);
818d276c 879 }
6cba611e 880 if (cbh)
fd98496f 881 err = journal_wait_on_commit_record(journal, cbh);
015c6033 882 stats.run.rs_blocks_logged++;
56316a0d 883 if (jbd2_has_feature_async_commit(journal) &&
f73bee49 884 journal->j_flags & JBD2_BARRIER) {
c6bf3f0e 885 blkdev_issue_flush(journal->j_dev);
f73bee49 886 }
470decc6
DK
887
888 if (err)
a7fa2baf 889 jbd2_journal_abort(journal, err);
470decc6 890
9f356e5a
JK
891 WARN_ON_ONCE(
892 atomic_read(&commit_transaction->t_outstanding_credits) < 0);
893
3339578f
JK
894 /*
895 * Now disk caches for filesystem device are flushed so we are safe to
896 * erase checkpointed transactions from the log by updating journal
897 * superblock.
898 */
899 if (update_tail)
900 jbd2_update_log_tail(journal, first_tid, first_block);
901
470decc6
DK
902 /* End of a transaction! Finally, we can do checkpoint
903 processing: any buffers committed as a result of this
904 transaction can be removed from any checkpoint list it was on
905 before. */
906
cb3b3bf2 907 jbd2_debug(3, "JBD2: commit phase 6\n");
470decc6 908
c851ed54 909 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
470decc6
DK
910 J_ASSERT(commit_transaction->t_buffers == NULL);
911 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
470decc6 912 J_ASSERT(commit_transaction->t_shadow_list == NULL);
470decc6
DK
913
914restart_loop:
915 /*
916 * As there are other places (journal_unmap_buffer()) adding buffers
917 * to this list we have to be careful and hold the j_list_lock.
918 */
919 spin_lock(&journal->j_list_lock);
920 while (commit_transaction->t_forget) {
921 transaction_t *cp_transaction;
922 struct buffer_head *bh;
de1b7941 923 int try_to_free = 0;
93108ebb 924 bool drop_ref;
470decc6
DK
925
926 jh = commit_transaction->t_forget;
927 spin_unlock(&journal->j_list_lock);
928 bh = jh2bh(jh);
de1b7941
JK
929 /*
930 * Get a reference so that bh cannot be freed before we are
931 * done with it.
932 */
933 get_bh(bh);
46417064 934 spin_lock(&jh->b_state_lock);
23e2af35 935 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
470decc6
DK
936
937 /*
938 * If there is undo-protected committed data against
939 * this buffer, then we can remove it now. If it is a
940 * buffer needing such protection, the old frozen_data
941 * field now points to a committed version of the
942 * buffer, so rotate that field to the new committed
943 * data.
944 *
945 * Otherwise, we can just throw away the frozen data now.
e06c8227
JB
946 *
947 * We also know that the frozen data has already fired
948 * its triggers if they exist, so we can clear that too.
470decc6
DK
949 */
950 if (jh->b_committed_data) {
af1e76d6 951 jbd2_free(jh->b_committed_data, bh->b_size);
470decc6
DK
952 jh->b_committed_data = NULL;
953 if (jh->b_frozen_data) {
954 jh->b_committed_data = jh->b_frozen_data;
955 jh->b_frozen_data = NULL;
e06c8227 956 jh->b_frozen_triggers = NULL;
470decc6
DK
957 }
958 } else if (jh->b_frozen_data) {
af1e76d6 959 jbd2_free(jh->b_frozen_data, bh->b_size);
470decc6 960 jh->b_frozen_data = NULL;
e06c8227 961 jh->b_frozen_triggers = NULL;
470decc6
DK
962 }
963
964 spin_lock(&journal->j_list_lock);
965 cp_transaction = jh->b_cp_transaction;
966 if (cp_transaction) {
967 JBUFFER_TRACE(jh, "remove from old cp transaction");
8e85fb3f 968 cp_transaction->t_chp_stats.cs_dropped++;
f7f4bccb 969 __jbd2_journal_remove_checkpoint(jh);
470decc6
DK
970 }
971
972 /* Only re-checkpoint the buffer_head if it is marked
973 * dirty. If the buffer was added to the BJ_Forget list
f7f4bccb 974 * by jbd2_journal_forget, it may no longer be dirty and
470decc6
DK
975 * there's no point in keeping a checkpoint record for
976 * it. */
977
b794e7a6 978 /*
6a66a7de 979 * A buffer which has been freed while still being journaled
980 * by a previous transaction, refile the buffer to BJ_Forget of
981 * the running transaction. If the just committed transaction
982 * contains "add to orphan" operation, we can completely
983 * invalidate the buffer now. We are rather through in that
984 * since the buffer may be still accessible when blocksize <
985 * pagesize and it is attached to the last partial page.
986 */
987 if (buffer_freed(bh) && !jh->b_next_transaction) {
c96dceea 988 struct address_space *mapping;
989
6a66a7de 990 clear_buffer_freed(bh);
991 clear_buffer_jbddirty(bh);
c96dceea 992
993 /*
994 * Block device buffers need to stay mapped all the
995 * time, so it is enough to clear buffer_jbddirty and
996 * buffer_freed bits. For the file mapping buffers (i.e.
997 * journalled data) we need to unmap buffer and clear
998 * more bits. We also need to be careful about the check
999 * because the data page mapping can get cleared under
780f66e5 1000 * our hands. Note that if mapping == NULL, we don't
1001 * need to make buffer unmapped because the page is
1002 * already detached from the mapping and buffers cannot
1003 * get reused.
c96dceea 1004 */
0d22fe2f 1005 mapping = READ_ONCE(bh->b_folio->mapping);
c96dceea 1006 if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
1007 clear_buffer_mapped(bh);
1008 clear_buffer_new(bh);
1009 clear_buffer_req(bh);
1010 bh->b_bdev = NULL;
1011 }
470decc6
DK
1012 }
1013
1014 if (buffer_jbddirty(bh)) {
1015 JBUFFER_TRACE(jh, "add to new checkpointing trans");
f7f4bccb 1016 __jbd2_journal_insert_checkpoint(jh, commit_transaction);
7ad7445f
HK
1017 if (is_journal_aborted(journal))
1018 clear_buffer_jbddirty(bh);
470decc6
DK
1019 } else {
1020 J_ASSERT_BH(bh, !buffer_dirty(bh));
de1b7941
JK
1021 /*
1022 * The buffer on BJ_Forget list and not jbddirty means
470decc6
DK
1023 * it has been freed by this transaction and hence it
1024 * could not have been reallocated until this
1025 * transaction has committed. *BUT* it could be
1026 * reallocated once we have written all the data to
1027 * disk and before we process the buffer on BJ_Forget
de1b7941
JK
1028 * list.
1029 */
1030 if (!jh->b_next_transaction)
1031 try_to_free = 1;
470decc6 1032 }
de1b7941 1033 JBUFFER_TRACE(jh, "refile or unfile buffer");
93108ebb 1034 drop_ref = __jbd2_journal_refile_buffer(jh);
46417064 1035 spin_unlock(&jh->b_state_lock);
93108ebb
JK
1036 if (drop_ref)
1037 jbd2_journal_put_journal_head(jh);
de1b7941
JK
1038 if (try_to_free)
1039 release_buffer_page(bh); /* Drops bh reference */
1040 else
1041 __brelse(bh);
470decc6
DK
1042 cond_resched_lock(&journal->j_list_lock);
1043 }
1044 spin_unlock(&journal->j_list_lock);
1045 /*
f5a7a6b0
JK
1046 * This is a bit sleazy. We use j_list_lock to protect transition
1047 * of a transaction into T_FINISHED state and calling
1048 * __jbd2_journal_drop_transaction(). Otherwise we could race with
1049 * other checkpointing code processing the transaction...
470decc6 1050 */
a931da6a 1051 write_lock(&journal->j_state_lock);
470decc6
DK
1052 spin_lock(&journal->j_list_lock);
1053 /*
1054 * Now recheck if some buffers did not get attached to the transaction
1055 * while the lock was dropped...
1056 */
1057 if (commit_transaction->t_forget) {
1058 spin_unlock(&journal->j_list_lock);
a931da6a 1059 write_unlock(&journal->j_state_lock);
470decc6
DK
1060 goto restart_loop;
1061 }
1062
d4e839d4
TT
1063 /* Add the transaction to the checkpoint list
1064 * __journal_remove_checkpoint() can not destroy transaction
1065 * under us because it is not marked as T_FINISHED yet */
1066 if (journal->j_checkpoint_transactions == NULL) {
1067 journal->j_checkpoint_transactions = commit_transaction;
1068 commit_transaction->t_cpnext = commit_transaction;
1069 commit_transaction->t_cpprev = commit_transaction;
1070 } else {
1071 commit_transaction->t_cpnext =
1072 journal->j_checkpoint_transactions;
1073 commit_transaction->t_cpprev =
1074 commit_transaction->t_cpnext->t_cpprev;
1075 commit_transaction->t_cpnext->t_cpprev =
1076 commit_transaction;
1077 commit_transaction->t_cpprev->t_cpnext =
1078 commit_transaction;
1079 }
1080 spin_unlock(&journal->j_list_lock);
1081
470decc6
DK
1082 /* Done with this transaction! */
1083
cb3b3bf2 1084 jbd2_debug(3, "JBD2: commit phase 7\n");
470decc6 1085
bbd2be36 1086 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
470decc6 1087
8e85fb3f 1088 commit_transaction->t_start = jiffies;
bf699327
TT
1089 stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
1090 commit_transaction->t_start);
8e85fb3f
JL
1091
1092 /*
bf699327 1093 * File the transaction statistics
8e85fb3f 1094 */
8e85fb3f 1095 stats.ts_tid = commit_transaction->t_tid;
8dd42046
TT
1096 stats.run.rs_handle_count =
1097 atomic_read(&commit_transaction->t_handle_count);
bf699327
TT
1098 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1099 commit_transaction->t_tid, &stats.run);
42cf3452 1100 stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
8e85fb3f 1101
794446c6 1102 commit_transaction->t_state = T_COMMIT_CALLBACK;
470decc6 1103 J_ASSERT(commit_transaction == journal->j_committing_transaction);
7c73ddb7 1104 WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid);
470decc6 1105 journal->j_committing_transaction = NULL;
e07f7183 1106 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
470decc6 1107
e07f7183
JB
1108 /*
1109 * weight the commit time higher than the average time so we don't
1110 * react too strongly to vast changes in the commit time
1111 */
1112 if (likely(journal->j_average_commit_time))
1113 journal->j_average_commit_time = (commit_time +
1114 journal->j_average_commit_time*3) / 4;
1115 else
1116 journal->j_average_commit_time = commit_time;
794446c6 1117
a931da6a 1118 write_unlock(&journal->j_state_lock);
6c20ec85 1119
fb68407b
AK
1120 if (journal->j_commit_callback)
1121 journal->j_commit_callback(journal, commit_transaction);
ff780b91 1122 if (journal->j_fc_cleanup_callback)
e85c81ba 1123 journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
fb68407b 1124
879c5e6b 1125 trace_jbd2_end_commit(journal, commit_transaction);
cb3b3bf2 1126 jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
470decc6
DK
1127 journal->j_commit_sequence, journal->j_tail_sequence);
1128
794446c6 1129 write_lock(&journal->j_state_lock);
ff780b91
HS
1130 journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
1131 journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
794446c6
DM
1132 spin_lock(&journal->j_list_lock);
1133 commit_transaction->t_state = T_FINISHED;
d4e839d4 1134 /* Check if the transaction can be dropped now that we are finished */
be222553 1135 if (commit_transaction->t_checkpoint_list == NULL) {
794446c6
DM
1136 __jbd2_journal_drop_transaction(journal, commit_transaction);
1137 jbd2_journal_free_transaction(commit_transaction);
1138 }
1139 spin_unlock(&journal->j_list_lock);
1140 write_unlock(&journal->j_state_lock);
470decc6 1141 wake_up(&journal->j_wait_done_commit);
ff780b91 1142 wake_up(&journal->j_fc_wait);
42cf3452
TT
1143
1144 /*
1145 * Calculate overall stats
1146 */
1147 spin_lock(&journal->j_history_lock);
1148 journal->j_stats.ts_tid++;
1149 journal->j_stats.ts_requested += stats.ts_requested;
1150 journal->j_stats.run.rs_wait += stats.run.rs_wait;
1151 journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
1152 journal->j_stats.run.rs_running += stats.run.rs_running;
1153 journal->j_stats.run.rs_locked += stats.run.rs_locked;
1154 journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
1155 journal->j_stats.run.rs_logging += stats.run.rs_logging;
1156 journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
1157 journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
1158 journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
1159 spin_unlock(&journal->j_history_lock);
470decc6 1160}