Commit | Line | Data |
---|---|---|
6866d7b3 HS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * fs/ext4/fast_commit.c | |
5 | * | |
6 | * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> | |
7 | * | |
8 | * Ext4 fast commits routines. | |
9 | */ | |
aa75f4d3 | 10 | #include "ext4.h" |
6866d7b3 | 11 | #include "ext4_jbd2.h" |
aa75f4d3 HS |
12 | #include "ext4_extents.h" |
13 | #include "mballoc.h" | |
14 | ||
15 | /* | |
16 | * Ext4 Fast Commits | |
17 | * ----------------- | |
18 | * | |
19 | * Ext4 fast commits implement fine grained journalling for Ext4. | |
20 | * | |
21 | * Fast commits are organized as a log of tag-length-value (TLV) structs. (See | |
22 | * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by | |
23 | * TLV during the recovery phase. For the scenarios for which we currently | |
24 | * don't have replay code, fast commit falls back to full commits. | |
25 | * Fast commits record delta in one of the following three categories. | |
26 | * | |
27 | * (A) Directory entry updates: | |
28 | * | |
29 | * - EXT4_FC_TAG_UNLINK - records directory entry unlink | |
30 | * - EXT4_FC_TAG_LINK - records directory entry link | |
31 | * - EXT4_FC_TAG_CREAT - records inode and directory entry creation | |
32 | * | |
33 | * (B) File specific data range updates: | |
34 | * | |
35 | * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode | |
36 | * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode | |
37 | * | |
38 | * (C) Inode metadata (mtime / ctime etc): | |
39 | * | |
40 | * - EXT4_FC_TAG_INODE - record the inode that should be replayed | |
41 | * during recovery. Note that iblocks field is | |
42 | * not replayed and instead derived during | |
43 | * replay. | |
44 | * Commit Operation | |
45 | * ---------------- | |
46 | * With fast commits, we maintain all the directory entry operations in the | |
47 | * order in which they are issued in an in-memory queue. This queue is flushed | |
48 | * to disk during the commit operation. We also maintain a list of inodes | |
49 | * that need to be committed during a fast commit in another in memory queue of | |
50 | * inodes. During the commit operation, we commit in the following order: | |
51 | * | |
52 | * [1] Lock inodes for any further data updates by setting COMMITTING state | |
53 | * [2] Submit data buffers of all the inodes | |
54 | * [3] Wait for [2] to complete | |
55 | * [4] Commit all the directory entry updates in the fast commit space | |
56 | * [5] Commit all the changed inode structures | |
57 | * [6] Write tail tag (this tag ensures the atomicity, please read the following | |
58 | * section for more details). | |
59 | * [7] Wait for [4], [5] and [6] to complete. | |
60 | * | |
61 | * All the inode updates must call ext4_fc_start_update() before starting an | |
62 | * update. If such an ongoing update is present, fast commit waits for it to | |
63 | * complete. The completion of such an update is marked by | |
64 | * ext4_fc_stop_update(). | |
65 | * | |
66 | * Fast Commit Ineligibility | |
67 | * ------------------------- | |
aa75f4d3 | 68 | * |
7bbbe241 HS |
69 | * Not all operations are supported by fast commits today (e.g extended |
70 | * attributes). Fast commit ineligibility is marked by calling | |
71 | * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back | |
72 | * to full commit. | |
aa75f4d3 HS |
73 | * |
74 | * Atomicity of commits | |
75 | * -------------------- | |
a740762f | 76 | * In order to guarantee atomicity during the commit operation, fast commit |
aa75f4d3 HS |
77 | * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail |
78 | * tag contains CRC of the contents and TID of the transaction after which | |
79 | * this fast commit should be applied. Recovery code replays fast commit | |
80 | * logs only if there's at least 1 valid tail present. For every fast commit | |
81 | * operation, there is 1 tail. This means, we may end up with multiple tails | |
82 | * in the fast commit space. Here's an example: | |
83 | * | |
84 | * - Create a new file A and remove existing file B | |
85 | * - fsync() | |
86 | * - Append contents to file A | |
87 | * - Truncate file A | |
88 | * - fsync() | |
89 | * | |
90 | * The fast commit space at the end of above operations would look like this: | |
91 | * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] | |
92 | * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| | |
93 | * | |
94 | * Replay code should thus check for all the valid tails in the FC area. | |
95 | * | |
b1b7dce3 HS |
96 | * Fast Commit Replay Idempotence |
97 | * ------------------------------ | |
98 | * | |
99 | * Fast commits tags are idempotent in nature provided the recovery code follows | |
100 | * certain rules. The guiding principle that the commit path follows while | |
101 | * committing is that it stores the result of a particular operation instead of | |
102 | * storing the procedure. | |
103 | * | |
104 | * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' | |
105 | * was associated with inode 10. During fast commit, instead of storing this | |
106 | * operation as a procedure "rename a to b", we store the resulting file system | |
107 | * state as a "series" of outcomes: | |
108 | * | |
109 | * - Link dirent b to inode 10 | |
110 | * - Unlink dirent a | |
111 | * - Inode <10> with valid refcount | |
112 | * | |
113 | * Now when recovery code runs, it needs "enforce" this state on the file | |
114 | * system. This is what guarantees idempotence of fast commit replay. | |
115 | * | |
116 | * Let's take an example of a procedure that is not idempotent and see how fast | |
117 | * commits make it idempotent. Consider following sequence of operations: | |
118 | * | |
119 | * rm A; mv B A; read A | |
120 | * (x) (y) (z) | |
121 | * | |
122 | * (x), (y) and (z) are the points at which we can crash. If we store this | |
123 | * sequence of operations as is then the replay is not idempotent. Let's say | |
124 | * while in replay, we crash at (z). During the second replay, file A (which was | |
125 | * actually created as a result of "mv B A" operation) would get deleted. Thus, | |
126 | * file named A would be absent when we try to read A. So, this sequence of | |
127 | * operations is not idempotent. However, as mentioned above, instead of storing | |
128 | * the procedure fast commits store the outcome of each procedure. Thus the fast | |
129 | * commit log for above procedure would be as follows: | |
130 | * | |
131 | * (Let's assume dirent A was linked to inode 10 and dirent B was linked to | |
132 | * inode 11 before the replay) | |
133 | * | |
134 | * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] | |
135 | * (w) (x) (y) (z) | |
136 | * | |
137 | * If we crash at (z), we will have file A linked to inode 11. During the second | |
138 | * replay, we will remove file A (inode 11). But we will create it back and make | |
139 | * it point to inode 11. We won't find B, so we'll just skip that step. At this | |
140 | * point, the refcount for inode 11 is not reliable, but that gets fixed by the | |
141 | * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled | |
142 | * similarly. Thus, by converting a non-idempotent procedure into a series of | |
143 | * idempotent outcomes, fast commits ensured idempotence during the replay. | |
144 | * | |
aa75f4d3 HS |
145 | * TODOs |
146 | * ----- | |
b1b7dce3 HS |
147 | * |
148 | * 0) Fast commit replay path hardening: Fast commit replay code should use | |
149 | * journal handles to make sure all the updates it does during the replay | |
150 | * path are atomic. With that if we crash during fast commit replay, after | |
151 | * trying to do recovery again, we will find a file system where fast commit | |
152 | * area is invalid (because new full commit would be found). In order to deal | |
153 | * with that, fast commit replay code should ensure that the "FC_REPLAY" | |
154 | * superblock state is persisted before starting the replay, so that after | |
155 | * the crash, fast commit recovery code can look at that flag and perform | |
156 | * fast commit recovery even if that area is invalidated by later full | |
157 | * commits. | |
158 | * | |
d1199b94 HS |
159 | * 1) Fast commit's commit path locks the entire file system during fast |
160 | * commit. This has significant performance penalty. Instead of that, we | |
161 | * should use ext4_fc_start/stop_update functions to start inode level | |
162 | * updates from ext4_journal_start/stop. Once we do that we can drop file | |
163 | * system locking during commit path. | |
aa75f4d3 | 164 | * |
d1199b94 | 165 | * 2) Handle more ineligible cases. |
aa75f4d3 HS |
166 | */ |
167 | ||
168 | #include <trace/events/ext4.h> | |
169 | static struct kmem_cache *ext4_fc_dentry_cachep; | |
170 | ||
171 | static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |
172 | { | |
173 | BUFFER_TRACE(bh, ""); | |
174 | if (uptodate) { | |
175 | ext4_debug("%s: Block %lld up-to-date", | |
176 | __func__, bh->b_blocknr); | |
177 | set_buffer_uptodate(bh); | |
178 | } else { | |
179 | ext4_debug("%s: Block %lld not up-to-date", | |
180 | __func__, bh->b_blocknr); | |
181 | clear_buffer_uptodate(bh); | |
182 | } | |
183 | ||
184 | unlock_buffer(bh); | |
185 | } | |
186 | ||
187 | static inline void ext4_fc_reset_inode(struct inode *inode) | |
188 | { | |
189 | struct ext4_inode_info *ei = EXT4_I(inode); | |
190 | ||
191 | ei->i_fc_lblk_start = 0; | |
192 | ei->i_fc_lblk_len = 0; | |
193 | } | |
194 | ||
195 | void ext4_fc_init_inode(struct inode *inode) | |
196 | { | |
197 | struct ext4_inode_info *ei = EXT4_I(inode); | |
198 | ||
199 | ext4_fc_reset_inode(inode); | |
200 | ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); | |
201 | INIT_LIST_HEAD(&ei->i_fc_list); | |
b3998b3b | 202 | INIT_LIST_HEAD(&ei->i_fc_dilist); |
aa75f4d3 HS |
203 | init_waitqueue_head(&ei->i_fc_wait); |
204 | atomic_set(&ei->i_fc_updates, 0); | |
aa75f4d3 HS |
205 | } |
206 | ||
f6634e26 HS |
207 | /* This function must be called with sbi->s_fc_lock held. */ |
208 | static void ext4_fc_wait_committing_inode(struct inode *inode) | |
fa329e27 | 209 | __releases(&EXT4_SB(inode->i_sb)->s_fc_lock) |
f6634e26 HS |
210 | { |
211 | wait_queue_head_t *wq; | |
212 | struct ext4_inode_info *ei = EXT4_I(inode); | |
213 | ||
214 | #if (BITS_PER_LONG < 64) | |
215 | DEFINE_WAIT_BIT(wait, &ei->i_state_flags, | |
216 | EXT4_STATE_FC_COMMITTING); | |
217 | wq = bit_waitqueue(&ei->i_state_flags, | |
218 | EXT4_STATE_FC_COMMITTING); | |
219 | #else | |
220 | DEFINE_WAIT_BIT(wait, &ei->i_flags, | |
221 | EXT4_STATE_FC_COMMITTING); | |
222 | wq = bit_waitqueue(&ei->i_flags, | |
223 | EXT4_STATE_FC_COMMITTING); | |
224 | #endif | |
225 | lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); | |
226 | prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); | |
227 | spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); | |
228 | schedule(); | |
229 | finish_wait(wq, &wait.wq_entry); | |
230 | } | |
231 | ||
b7b80a35 YB |
232 | static bool ext4_fc_disabled(struct super_block *sb) |
233 | { | |
234 | return (!test_opt2(sb, JOURNAL_FAST_COMMIT) || | |
235 | (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)); | |
236 | } | |
237 | ||
aa75f4d3 HS |
238 | /* |
239 | * Inform Ext4's fast about start of an inode update | |
240 | * | |
241 | * This function is called by the high level call VFS callbacks before | |
242 | * performing any inode update. This function blocks if there's an ongoing | |
243 | * fast commit on the inode in question. | |
244 | */ | |
245 | void ext4_fc_start_update(struct inode *inode) | |
246 | { | |
247 | struct ext4_inode_info *ei = EXT4_I(inode); | |
248 | ||
b7b80a35 | 249 | if (ext4_fc_disabled(inode->i_sb)) |
aa75f4d3 HS |
250 | return; |
251 | ||
252 | restart: | |
253 | spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); | |
254 | if (list_empty(&ei->i_fc_list)) | |
255 | goto out; | |
256 | ||
257 | if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { | |
f6634e26 | 258 | ext4_fc_wait_committing_inode(inode); |
aa75f4d3 HS |
259 | goto restart; |
260 | } | |
261 | out: | |
262 | atomic_inc(&ei->i_fc_updates); | |
263 | spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); | |
264 | } | |
265 | ||
266 | /* | |
267 | * Stop inode update and wake up waiting fast commits if any. | |
268 | */ | |
269 | void ext4_fc_stop_update(struct inode *inode) | |
270 | { | |
271 | struct ext4_inode_info *ei = EXT4_I(inode); | |
272 | ||
b7b80a35 | 273 | if (ext4_fc_disabled(inode->i_sb)) |
aa75f4d3 HS |
274 | return; |
275 | ||
276 | if (atomic_dec_and_test(&ei->i_fc_updates)) | |
277 | wake_up_all(&ei->i_fc_wait); | |
278 | } | |
279 | ||
280 | /* | |
281 | * Remove inode from fast commit list. If the inode is being committed | |
282 | * we wait until inode commit is done. | |
283 | */ | |
284 | void ext4_fc_del(struct inode *inode) | |
285 | { | |
286 | struct ext4_inode_info *ei = EXT4_I(inode); | |
b3998b3b RH |
287 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
288 | struct ext4_fc_dentry_update *fc_dentry; | |
aa75f4d3 | 289 | |
b7b80a35 | 290 | if (ext4_fc_disabled(inode->i_sb)) |
aa75f4d3 HS |
291 | return; |
292 | ||
293 | restart: | |
294 | spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); | |
b3998b3b | 295 | if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { |
aa75f4d3 HS |
296 | spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); |
297 | return; | |
298 | } | |
299 | ||
300 | if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { | |
f6634e26 | 301 | ext4_fc_wait_committing_inode(inode); |
aa75f4d3 HS |
302 | goto restart; |
303 | } | |
b3998b3b RH |
304 | |
305 | if (!list_empty(&ei->i_fc_list)) | |
306 | list_del_init(&ei->i_fc_list); | |
307 | ||
308 | /* | |
309 | * Since this inode is getting removed, let's also remove all FC | |
310 | * dentry create references, since it is not needed to log it anyways. | |
311 | */ | |
312 | if (list_empty(&ei->i_fc_dilist)) { | |
313 | spin_unlock(&sbi->s_fc_lock); | |
314 | return; | |
315 | } | |
316 | ||
317 | fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); | |
318 | WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); | |
319 | list_del_init(&fc_dentry->fcd_list); | |
320 | list_del_init(&fc_dentry->fcd_dilist); | |
321 | ||
322 | WARN_ON(!list_empty(&ei->i_fc_dilist)); | |
323 | spin_unlock(&sbi->s_fc_lock); | |
324 | ||
325 | if (fc_dentry->fcd_name.name && | |
326 | fc_dentry->fcd_name.len > DNAME_INLINE_LEN) | |
327 | kfree(fc_dentry->fcd_name.name); | |
328 | kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); | |
329 | ||
330 | return; | |
aa75f4d3 HS |
331 | } |
332 | ||
333 | /* | |
e85c81ba XY |
334 | * Mark file system as fast commit ineligible, and record latest |
335 | * ineligible transaction tid. This means until the recorded | |
336 | * transaction, commit operation would result in a full jbd2 commit. | |
aa75f4d3 | 337 | */ |
e85c81ba | 338 | void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) |
aa75f4d3 HS |
339 | { |
340 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
e85c81ba | 341 | tid_t tid; |
aa75f4d3 | 342 | |
b7b80a35 | 343 | if (ext4_fc_disabled(sb)) |
8016e29f HS |
344 | return; |
345 | ||
9b5f6c9b | 346 | ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); |
e85c81ba XY |
347 | if (handle && !IS_ERR(handle)) |
348 | tid = handle->h_transaction->t_tid; | |
349 | else { | |
350 | read_lock(&sbi->s_journal->j_state_lock); | |
351 | tid = sbi->s_journal->j_running_transaction ? | |
352 | sbi->s_journal->j_running_transaction->t_tid : 0; | |
353 | read_unlock(&sbi->s_journal->j_state_lock); | |
354 | } | |
355 | spin_lock(&sbi->s_fc_lock); | |
356 | if (sbi->s_fc_ineligible_tid < tid) | |
357 | sbi->s_fc_ineligible_tid = tid; | |
358 | spin_unlock(&sbi->s_fc_lock); | |
aa75f4d3 HS |
359 | WARN_ON(reason >= EXT4_FC_REASON_MAX); |
360 | sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; | |
361 | } | |
362 | ||
aa75f4d3 HS |
363 | /* |
364 | * Generic fast commit tracking function. If this is the first time this we are | |
365 | * called after a full commit, we initialize fast commit fields and then call | |
366 | * __fc_track_fn() with update = 0. If we have already been called after a full | |
367 | * commit, we pass update = 1. Based on that, the track function can determine | |
368 | * if it needs to track a field for the first time or if it needs to just | |
369 | * update the previously tracked value. | |
370 | * | |
371 | * If enqueue is set, this function enqueues the inode in fast commit list. | |
372 | */ | |
373 | static int ext4_fc_track_template( | |
a80f7fcf HS |
374 | handle_t *handle, struct inode *inode, |
375 | int (*__fc_track_fn)(struct inode *, void *, bool), | |
aa75f4d3 HS |
376 | void *args, int enqueue) |
377 | { | |
aa75f4d3 HS |
378 | bool update = false; |
379 | struct ext4_inode_info *ei = EXT4_I(inode); | |
380 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | |
a80f7fcf | 381 | tid_t tid = 0; |
aa75f4d3 HS |
382 | int ret; |
383 | ||
a80f7fcf | 384 | tid = handle->h_transaction->t_tid; |
aa75f4d3 | 385 | mutex_lock(&ei->i_fc_lock); |
a80f7fcf | 386 | if (tid == ei->i_sync_tid) { |
aa75f4d3 HS |
387 | update = true; |
388 | } else { | |
389 | ext4_fc_reset_inode(inode); | |
a80f7fcf | 390 | ei->i_sync_tid = tid; |
aa75f4d3 HS |
391 | } |
392 | ret = __fc_track_fn(inode, args, update); | |
393 | mutex_unlock(&ei->i_fc_lock); | |
394 | ||
395 | if (!enqueue) | |
396 | return ret; | |
397 | ||
398 | spin_lock(&sbi->s_fc_lock); | |
399 | if (list_empty(&EXT4_I(inode)->i_fc_list)) | |
400 | list_add_tail(&EXT4_I(inode)->i_fc_list, | |
bdc8a53a XY |
401 | (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || |
402 | sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? | |
aa75f4d3 HS |
403 | &sbi->s_fc_q[FC_Q_STAGING] : |
404 | &sbi->s_fc_q[FC_Q_MAIN]); | |
405 | spin_unlock(&sbi->s_fc_lock); | |
406 | ||
407 | return ret; | |
408 | } | |
409 | ||
410 | struct __track_dentry_update_args { | |
411 | struct dentry *dentry; | |
412 | int op; | |
413 | }; | |
414 | ||
415 | /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ | |
416 | static int __track_dentry_update(struct inode *inode, void *arg, bool update) | |
417 | { | |
418 | struct ext4_fc_dentry_update *node; | |
419 | struct ext4_inode_info *ei = EXT4_I(inode); | |
420 | struct __track_dentry_update_args *dentry_update = | |
421 | (struct __track_dentry_update_args *)arg; | |
422 | struct dentry *dentry = dentry_update->dentry; | |
0fbcb525 EB |
423 | struct inode *dir = dentry->d_parent->d_inode; |
424 | struct super_block *sb = inode->i_sb; | |
425 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
aa75f4d3 HS |
426 | |
427 | mutex_unlock(&ei->i_fc_lock); | |
0fbcb525 EB |
428 | |
429 | if (IS_ENCRYPTED(dir)) { | |
430 | ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, | |
431 | NULL); | |
432 | mutex_lock(&ei->i_fc_lock); | |
433 | return -EOPNOTSUPP; | |
434 | } | |
435 | ||
aa75f4d3 HS |
436 | node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); |
437 | if (!node) { | |
0fbcb525 | 438 | ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); |
aa75f4d3 HS |
439 | mutex_lock(&ei->i_fc_lock); |
440 | return -ENOMEM; | |
441 | } | |
442 | ||
443 | node->fcd_op = dentry_update->op; | |
0fbcb525 | 444 | node->fcd_parent = dir->i_ino; |
aa75f4d3 HS |
445 | node->fcd_ino = inode->i_ino; |
446 | if (dentry->d_name.len > DNAME_INLINE_LEN) { | |
447 | node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); | |
448 | if (!node->fcd_name.name) { | |
449 | kmem_cache_free(ext4_fc_dentry_cachep, node); | |
0fbcb525 | 450 | ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); |
aa75f4d3 HS |
451 | mutex_lock(&ei->i_fc_lock); |
452 | return -ENOMEM; | |
453 | } | |
454 | memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, | |
455 | dentry->d_name.len); | |
456 | } else { | |
457 | memcpy(node->fcd_iname, dentry->d_name.name, | |
458 | dentry->d_name.len); | |
459 | node->fcd_name.name = node->fcd_iname; | |
460 | } | |
461 | node->fcd_name.len = dentry->d_name.len; | |
b3998b3b | 462 | INIT_LIST_HEAD(&node->fcd_dilist); |
aa75f4d3 | 463 | spin_lock(&sbi->s_fc_lock); |
bdc8a53a XY |
464 | if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || |
465 | sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) | |
aa75f4d3 HS |
466 | list_add_tail(&node->fcd_list, |
467 | &sbi->s_fc_dentry_q[FC_Q_STAGING]); | |
468 | else | |
469 | list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); | |
b3998b3b RH |
470 | |
471 | /* | |
472 | * This helps us keep a track of all fc_dentry updates which is part of | |
473 | * this ext4 inode. So in case the inode is getting unlinked, before | |
474 | * even we get a chance to fsync, we could remove all fc_dentry | |
475 | * references while evicting the inode in ext4_fc_del(). | |
476 | * Also with this, we don't need to loop over all the inodes in | |
477 | * sbi->s_fc_q to get the corresponding inode in | |
478 | * ext4_fc_commit_dentry_updates(). | |
479 | */ | |
480 | if (dentry_update->op == EXT4_FC_TAG_CREAT) { | |
481 | WARN_ON(!list_empty(&ei->i_fc_dilist)); | |
482 | list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); | |
483 | } | |
aa75f4d3 HS |
484 | spin_unlock(&sbi->s_fc_lock); |
485 | mutex_lock(&ei->i_fc_lock); | |
486 | ||
487 | return 0; | |
488 | } | |
489 | ||
a80f7fcf HS |
490 | void __ext4_fc_track_unlink(handle_t *handle, |
491 | struct inode *inode, struct dentry *dentry) | |
aa75f4d3 HS |
492 | { |
493 | struct __track_dentry_update_args args; | |
494 | int ret; | |
495 | ||
496 | args.dentry = dentry; | |
497 | args.op = EXT4_FC_TAG_UNLINK; | |
498 | ||
a80f7fcf | 499 | ret = ext4_fc_track_template(handle, inode, __track_dentry_update, |
aa75f4d3 | 500 | (void *)&args, 0); |
1d2e2440 | 501 | trace_ext4_fc_track_unlink(handle, inode, dentry, ret); |
aa75f4d3 HS |
502 | } |
503 | ||
a80f7fcf HS |
504 | void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) |
505 | { | |
78be0471 | 506 | struct inode *inode = d_inode(dentry); |
78be0471 | 507 | |
b7b80a35 | 508 | if (ext4_fc_disabled(inode->i_sb)) |
78be0471 RH |
509 | return; |
510 | ||
511 | if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) | |
512 | return; | |
513 | ||
514 | __ext4_fc_track_unlink(handle, inode, dentry); | |
a80f7fcf HS |
515 | } |
516 | ||
517 | void __ext4_fc_track_link(handle_t *handle, | |
518 | struct inode *inode, struct dentry *dentry) | |
aa75f4d3 HS |
519 | { |
520 | struct __track_dentry_update_args args; | |
521 | int ret; | |
522 | ||
523 | args.dentry = dentry; | |
524 | args.op = EXT4_FC_TAG_LINK; | |
525 | ||
a80f7fcf | 526 | ret = ext4_fc_track_template(handle, inode, __track_dentry_update, |
aa75f4d3 | 527 | (void *)&args, 0); |
1d2e2440 | 528 | trace_ext4_fc_track_link(handle, inode, dentry, ret); |
aa75f4d3 HS |
529 | } |
530 | ||
a80f7fcf HS |
531 | void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) |
532 | { | |
78be0471 | 533 | struct inode *inode = d_inode(dentry); |
78be0471 | 534 | |
b7b80a35 | 535 | if (ext4_fc_disabled(inode->i_sb)) |
78be0471 RH |
536 | return; |
537 | ||
538 | if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) | |
539 | return; | |
540 | ||
541 | __ext4_fc_track_link(handle, inode, dentry); | |
a80f7fcf HS |
542 | } |
543 | ||
8210bb29 HS |
544 | void __ext4_fc_track_create(handle_t *handle, struct inode *inode, |
545 | struct dentry *dentry) | |
aa75f4d3 HS |
546 | { |
547 | struct __track_dentry_update_args args; | |
548 | int ret; | |
549 | ||
550 | args.dentry = dentry; | |
551 | args.op = EXT4_FC_TAG_CREAT; | |
552 | ||
a80f7fcf | 553 | ret = ext4_fc_track_template(handle, inode, __track_dentry_update, |
aa75f4d3 | 554 | (void *)&args, 0); |
1d2e2440 | 555 | trace_ext4_fc_track_create(handle, inode, dentry, ret); |
aa75f4d3 HS |
556 | } |
557 | ||
8210bb29 HS |
558 | void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) |
559 | { | |
78be0471 | 560 | struct inode *inode = d_inode(dentry); |
78be0471 | 561 | |
b7b80a35 | 562 | if (ext4_fc_disabled(inode->i_sb)) |
78be0471 RH |
563 | return; |
564 | ||
565 | if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) | |
566 | return; | |
567 | ||
568 | __ext4_fc_track_create(handle, inode, dentry); | |
8210bb29 HS |
569 | } |
570 | ||
aa75f4d3 HS |
571 | /* __track_fn for inode tracking */ |
572 | static int __track_inode(struct inode *inode, void *arg, bool update) | |
573 | { | |
574 | if (update) | |
575 | return -EEXIST; | |
576 | ||
577 | EXT4_I(inode)->i_fc_lblk_len = 0; | |
578 | ||
579 | return 0; | |
580 | } | |
581 | ||
a80f7fcf | 582 | void ext4_fc_track_inode(handle_t *handle, struct inode *inode) |
aa75f4d3 HS |
583 | { |
584 | int ret; | |
585 | ||
586 | if (S_ISDIR(inode->i_mode)) | |
587 | return; | |
588 | ||
e64e6ca9 YB |
589 | if (ext4_fc_disabled(inode->i_sb)) |
590 | return; | |
591 | ||
556e0319 HS |
592 | if (ext4_should_journal_data(inode)) { |
593 | ext4_fc_mark_ineligible(inode->i_sb, | |
e85c81ba | 594 | EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); |
556e0319 HS |
595 | return; |
596 | } | |
597 | ||
78be0471 RH |
598 | if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) |
599 | return; | |
600 | ||
a80f7fcf | 601 | ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); |
1d2e2440 | 602 | trace_ext4_fc_track_inode(handle, inode, ret); |
aa75f4d3 HS |
603 | } |
604 | ||
605 | struct __track_range_args { | |
606 | ext4_lblk_t start, end; | |
607 | }; | |
608 | ||
609 | /* __track_fn for tracking data updates */ | |
610 | static int __track_range(struct inode *inode, void *arg, bool update) | |
611 | { | |
612 | struct ext4_inode_info *ei = EXT4_I(inode); | |
613 | ext4_lblk_t oldstart; | |
614 | struct __track_range_args *__arg = | |
615 | (struct __track_range_args *)arg; | |
616 | ||
617 | if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { | |
618 | ext4_debug("Special inode %ld being modified\n", inode->i_ino); | |
619 | return -ECANCELED; | |
620 | } | |
621 | ||
622 | oldstart = ei->i_fc_lblk_start; | |
623 | ||
624 | if (update && ei->i_fc_lblk_len > 0) { | |
625 | ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); | |
626 | ei->i_fc_lblk_len = | |
627 | max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - | |
628 | ei->i_fc_lblk_start + 1; | |
629 | } else { | |
630 | ei->i_fc_lblk_start = __arg->start; | |
631 | ei->i_fc_lblk_len = __arg->end - __arg->start + 1; | |
632 | } | |
633 | ||
634 | return 0; | |
635 | } | |
636 | ||
a80f7fcf | 637 | void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, |
aa75f4d3 HS |
638 | ext4_lblk_t end) |
639 | { | |
640 | struct __track_range_args args; | |
641 | int ret; | |
642 | ||
643 | if (S_ISDIR(inode->i_mode)) | |
644 | return; | |
645 | ||
b7b80a35 | 646 | if (ext4_fc_disabled(inode->i_sb)) |
78be0471 RH |
647 | return; |
648 | ||
649 | if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) | |
650 | return; | |
651 | ||
aa75f4d3 HS |
652 | args.start = start; |
653 | args.end = end; | |
654 | ||
a80f7fcf | 655 | ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); |
aa75f4d3 | 656 | |
1d2e2440 | 657 | trace_ext4_fc_track_range(handle, inode, start, end, ret); |
aa75f4d3 HS |
658 | } |
659 | ||
e9f53353 | 660 | static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) |
aa75f4d3 | 661 | { |
67c0f556 | 662 | blk_opf_t write_flags = REQ_SYNC; |
aa75f4d3 HS |
663 | struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; |
664 | ||
e9f53353 DP |
665 | /* Add REQ_FUA | REQ_PREFLUSH only its tail */ |
666 | if (test_opt(sb, BARRIER) && is_tail) | |
aa75f4d3 HS |
667 | write_flags |= REQ_FUA | REQ_PREFLUSH; |
668 | lock_buffer(bh); | |
764b3fd3 | 669 | set_buffer_dirty(bh); |
aa75f4d3 HS |
670 | set_buffer_uptodate(bh); |
671 | bh->b_end_io = ext4_end_buffer_io_sync; | |
1420c4a5 | 672 | submit_bh(REQ_OP_WRITE | write_flags, bh); |
aa75f4d3 HS |
673 | EXT4_SB(sb)->s_fc_bh = NULL; |
674 | } | |
675 | ||
676 | /* Ext4 commit path routines */ | |
677 | ||
aa75f4d3 HS |
678 | /* |
679 | * Allocate len bytes on a fast commit buffer. | |
680 | * | |
681 | * During the commit time this function is used to manage fast commit | |
682 | * block space. We don't split a fast commit log onto different | |
683 | * blocks. So this function makes sure that if there's not enough space | |
684 | * on the current block, the remaining space in the current block is | |
685 | * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, | |
686 | * new block is from jbd2 and CRC is updated to reflect the padding | |
687 | * we added. | |
688 | */ | |
689 | static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) | |
690 | { | |
8415ce07 | 691 | struct ext4_fc_tl tl; |
aa75f4d3 HS |
692 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
693 | struct buffer_head *bh; | |
694 | int bsize = sbi->s_journal->j_blocksize; | |
695 | int ret, off = sbi->s_fc_bytes % bsize; | |
48a6a66d | 696 | int remaining; |
8415ce07 | 697 | u8 *dst; |
aa75f4d3 HS |
698 | |
699 | /* | |
48a6a66d EB |
700 | * If 'len' is too long to fit in any block alongside a PAD tlv, then we |
701 | * cannot fulfill the request. | |
aa75f4d3 | 702 | */ |
48a6a66d | 703 | if (len > bsize - EXT4_FC_TAG_BASE_LEN) |
aa75f4d3 HS |
704 | return NULL; |
705 | ||
48a6a66d EB |
706 | if (!sbi->s_fc_bh) { |
707 | ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); | |
708 | if (ret) | |
709 | return NULL; | |
710 | sbi->s_fc_bh = bh; | |
aa75f4d3 | 711 | } |
8415ce07 | 712 | dst = sbi->s_fc_bh->b_data + off; |
48a6a66d EB |
713 | |
714 | /* | |
715 | * Allocate the bytes in the current block if we can do so while still | |
716 | * leaving enough space for a PAD tlv. | |
717 | */ | |
718 | remaining = bsize - EXT4_FC_TAG_BASE_LEN - off; | |
719 | if (len <= remaining) { | |
720 | sbi->s_fc_bytes += len; | |
721 | return dst; | |
722 | } | |
723 | ||
724 | /* | |
725 | * Else, terminate the current block with a PAD tlv, then allocate a new | |
726 | * block and allocate the bytes at the start of that new block. | |
727 | */ | |
728 | ||
8415ce07 | 729 | tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); |
48a6a66d | 730 | tl.fc_len = cpu_to_le16(remaining); |
8805dbcb EB |
731 | memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); |
732 | memset(dst + EXT4_FC_TAG_BASE_LEN, 0, remaining); | |
733 | *crc = ext4_chksum(sbi, *crc, sbi->s_fc_bh->b_data, bsize); | |
594bc43b | 734 | |
e9f53353 | 735 | ext4_fc_submit_bh(sb, false); |
aa75f4d3 HS |
736 | |
737 | ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); | |
738 | if (ret) | |
739 | return NULL; | |
740 | sbi->s_fc_bh = bh; | |
48a6a66d | 741 | sbi->s_fc_bytes += bsize - off + len; |
aa75f4d3 HS |
742 | return sbi->s_fc_bh->b_data; |
743 | } | |
744 | ||
aa75f4d3 HS |
745 | /* |
746 | * Complete a fast commit by writing tail tag. | |
747 | * | |
748 | * Writing tail tag marks the end of a fast commit. In order to guarantee | |
749 | * atomicity, after writing tail tag, even if there's space remaining | |
750 | * in the block, next commit shouldn't use it. That's why tail tag | |
751 | * has the length as that of the remaining space on the block. | |
752 | */ | |
753 | static int ext4_fc_write_tail(struct super_block *sb, u32 crc) | |
754 | { | |
755 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
756 | struct ext4_fc_tl tl; | |
757 | struct ext4_fc_tail tail; | |
758 | int off, bsize = sbi->s_journal->j_blocksize; | |
759 | u8 *dst; | |
760 | ||
761 | /* | |
762 | * ext4_fc_reserve_space takes care of allocating an extra block if | |
763 | * there's no enough space on this block for accommodating this tail. | |
764 | */ | |
fdc2a3c7 | 765 | dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc); |
aa75f4d3 HS |
766 | if (!dst) |
767 | return -ENOSPC; | |
768 | ||
769 | off = sbi->s_fc_bytes % bsize; | |
770 | ||
771 | tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); | |
48a6a66d | 772 | tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail)); |
aa75f4d3 HS |
773 | sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); |
774 | ||
8805dbcb | 775 | memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); |
fdc2a3c7 | 776 | dst += EXT4_FC_TAG_BASE_LEN; |
aa75f4d3 | 777 | tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); |
8805dbcb | 778 | memcpy(dst, &tail.fc_tid, sizeof(tail.fc_tid)); |
aa75f4d3 | 779 | dst += sizeof(tail.fc_tid); |
8805dbcb EB |
780 | crc = ext4_chksum(sbi, crc, sbi->s_fc_bh->b_data, |
781 | dst - (u8 *)sbi->s_fc_bh->b_data); | |
aa75f4d3 | 782 | tail.fc_crc = cpu_to_le32(crc); |
8805dbcb | 783 | memcpy(dst, &tail.fc_crc, sizeof(tail.fc_crc)); |
594bc43b EB |
784 | dst += sizeof(tail.fc_crc); |
785 | memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */ | |
aa75f4d3 | 786 | |
e9f53353 | 787 | ext4_fc_submit_bh(sb, true); |
aa75f4d3 HS |
788 | |
789 | return 0; | |
790 | } | |
791 | ||
792 | /* | |
793 | * Adds tag, length, value and updates CRC. Returns true if tlv was added. | |
794 | * Returns false if there's not enough space. | |
795 | */ | |
796 | static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, | |
797 | u32 *crc) | |
798 | { | |
799 | struct ext4_fc_tl tl; | |
800 | u8 *dst; | |
801 | ||
fdc2a3c7 | 802 | dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); |
aa75f4d3 HS |
803 | if (!dst) |
804 | return false; | |
805 | ||
806 | tl.fc_tag = cpu_to_le16(tag); | |
807 | tl.fc_len = cpu_to_le16(len); | |
808 | ||
8805dbcb EB |
809 | memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); |
810 | memcpy(dst + EXT4_FC_TAG_BASE_LEN, val, len); | |
aa75f4d3 HS |
811 | |
812 | return true; | |
813 | } | |
814 | ||
815 | /* Same as above, but adds dentry tlv. */ | |
facec450 GJ |
816 | static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, |
817 | struct ext4_fc_dentry_update *fc_dentry) | |
aa75f4d3 HS |
818 | { |
819 | struct ext4_fc_dentry_info fcd; | |
820 | struct ext4_fc_tl tl; | |
facec450 | 821 | int dlen = fc_dentry->fcd_name.len; |
fdc2a3c7 YB |
822 | u8 *dst = ext4_fc_reserve_space(sb, |
823 | EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); | |
aa75f4d3 HS |
824 | |
825 | if (!dst) | |
826 | return false; | |
827 | ||
facec450 GJ |
828 | fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); |
829 | fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); | |
830 | tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); | |
aa75f4d3 | 831 | tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); |
8805dbcb | 832 | memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); |
fdc2a3c7 | 833 | dst += EXT4_FC_TAG_BASE_LEN; |
8805dbcb | 834 | memcpy(dst, &fcd, sizeof(fcd)); |
aa75f4d3 | 835 | dst += sizeof(fcd); |
8805dbcb | 836 | memcpy(dst, fc_dentry->fcd_name.name, dlen); |
aa75f4d3 HS |
837 | |
838 | return true; | |
839 | } | |
840 | ||
841 | /* | |
842 | * Writes inode in the fast commit space under TLV with tag @tag. | |
843 | * Returns 0 on success, error on failure. | |
844 | */ | |
845 | static int ext4_fc_write_inode(struct inode *inode, u32 *crc) | |
846 | { | |
847 | struct ext4_inode_info *ei = EXT4_I(inode); | |
848 | int inode_len = EXT4_GOOD_OLD_INODE_SIZE; | |
849 | int ret; | |
850 | struct ext4_iloc iloc; | |
851 | struct ext4_fc_inode fc_inode; | |
852 | struct ext4_fc_tl tl; | |
853 | u8 *dst; | |
854 | ||
855 | ret = ext4_get_inode_loc(inode, &iloc); | |
856 | if (ret) | |
857 | return ret; | |
858 | ||
6c31a689 HS |
859 | if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) |
860 | inode_len = EXT4_INODE_SIZE(inode->i_sb); | |
861 | else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) | |
aa75f4d3 HS |
862 | inode_len += ei->i_extra_isize; |
863 | ||
864 | fc_inode.fc_ino = cpu_to_le32(inode->i_ino); | |
865 | tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); | |
866 | tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); | |
867 | ||
ccbf8eeb | 868 | ret = -ECANCELED; |
aa75f4d3 | 869 | dst = ext4_fc_reserve_space(inode->i_sb, |
fdc2a3c7 | 870 | EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); |
aa75f4d3 | 871 | if (!dst) |
ccbf8eeb | 872 | goto err; |
aa75f4d3 | 873 | |
8805dbcb | 874 | memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); |
fdc2a3c7 | 875 | dst += EXT4_FC_TAG_BASE_LEN; |
8805dbcb | 876 | memcpy(dst, &fc_inode, sizeof(fc_inode)); |
aa75f4d3 | 877 | dst += sizeof(fc_inode); |
8805dbcb | 878 | memcpy(dst, (u8 *)ext4_raw_inode(&iloc), inode_len); |
ccbf8eeb YB |
879 | ret = 0; |
880 | err: | |
881 | brelse(iloc.bh); | |
882 | return ret; | |
aa75f4d3 HS |
883 | } |
884 | ||
885 | /* | |
886 | * Writes updated data ranges for the inode in question. Updates CRC. | |
887 | * Returns 0 on success, error otherwise. | |
888 | */ | |
889 | static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) | |
890 | { | |
891 | ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; | |
892 | struct ext4_inode_info *ei = EXT4_I(inode); | |
893 | struct ext4_map_blocks map; | |
894 | struct ext4_fc_add_range fc_ext; | |
895 | struct ext4_fc_del_range lrange; | |
896 | struct ext4_extent *ex; | |
897 | int ret; | |
898 | ||
899 | mutex_lock(&ei->i_fc_lock); | |
900 | if (ei->i_fc_lblk_len == 0) { | |
901 | mutex_unlock(&ei->i_fc_lock); | |
902 | return 0; | |
903 | } | |
904 | old_blk_size = ei->i_fc_lblk_start; | |
905 | new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; | |
906 | ei->i_fc_lblk_len = 0; | |
907 | mutex_unlock(&ei->i_fc_lock); | |
908 | ||
909 | cur_lblk_off = old_blk_size; | |
4978c659 JK |
910 | ext4_debug("will try writing %d to %d for inode %ld\n", |
911 | cur_lblk_off, new_blk_size, inode->i_ino); | |
aa75f4d3 HS |
912 | |
913 | while (cur_lblk_off <= new_blk_size) { | |
914 | map.m_lblk = cur_lblk_off; | |
915 | map.m_len = new_blk_size - cur_lblk_off + 1; | |
916 | ret = ext4_map_blocks(NULL, inode, &map, 0); | |
917 | if (ret < 0) | |
918 | return -ECANCELED; | |
919 | ||
920 | if (map.m_len == 0) { | |
921 | cur_lblk_off++; | |
922 | continue; | |
923 | } | |
924 | ||
925 | if (ret == 0) { | |
926 | lrange.fc_ino = cpu_to_le32(inode->i_ino); | |
927 | lrange.fc_lblk = cpu_to_le32(map.m_lblk); | |
928 | lrange.fc_len = cpu_to_le32(map.m_len); | |
929 | if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, | |
930 | sizeof(lrange), (u8 *)&lrange, crc)) | |
931 | return -ENOSPC; | |
932 | } else { | |
a2c2f082 HT |
933 | unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? |
934 | EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; | |
935 | ||
936 | /* Limit the number of blocks in one extent */ | |
937 | map.m_len = min(max, map.m_len); | |
938 | ||
aa75f4d3 HS |
939 | fc_ext.fc_ino = cpu_to_le32(inode->i_ino); |
940 | ex = (struct ext4_extent *)&fc_ext.fc_ex; | |
941 | ex->ee_block = cpu_to_le32(map.m_lblk); | |
942 | ex->ee_len = cpu_to_le16(map.m_len); | |
943 | ext4_ext_store_pblock(ex, map.m_pblk); | |
944 | if (map.m_flags & EXT4_MAP_UNWRITTEN) | |
945 | ext4_ext_mark_unwritten(ex); | |
946 | else | |
947 | ext4_ext_mark_initialized(ex); | |
948 | if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, | |
949 | sizeof(fc_ext), (u8 *)&fc_ext, crc)) | |
950 | return -ENOSPC; | |
951 | } | |
952 | ||
953 | cur_lblk_off += map.m_len; | |
954 | } | |
955 | ||
956 | return 0; | |
957 | } | |
958 | ||
959 | ||
960 | /* Submit data for all the fast commit inodes */ | |
961 | static int ext4_fc_submit_inode_data_all(journal_t *journal) | |
962 | { | |
c30365b9 | 963 | struct super_block *sb = journal->j_private; |
aa75f4d3 HS |
964 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
965 | struct ext4_inode_info *ei; | |
aa75f4d3 HS |
966 | int ret = 0; |
967 | ||
968 | spin_lock(&sbi->s_fc_lock); | |
96e7c02d | 969 | list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { |
aa75f4d3 HS |
970 | ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); |
971 | while (atomic_read(&ei->i_fc_updates)) { | |
972 | DEFINE_WAIT(wait); | |
973 | ||
974 | prepare_to_wait(&ei->i_fc_wait, &wait, | |
975 | TASK_UNINTERRUPTIBLE); | |
976 | if (atomic_read(&ei->i_fc_updates)) { | |
977 | spin_unlock(&sbi->s_fc_lock); | |
978 | schedule(); | |
979 | spin_lock(&sbi->s_fc_lock); | |
980 | } | |
981 | finish_wait(&ei->i_fc_wait, &wait); | |
982 | } | |
983 | spin_unlock(&sbi->s_fc_lock); | |
f30ff35f | 984 | ret = jbd2_submit_inode_data(journal, ei->jinode); |
aa75f4d3 HS |
985 | if (ret) |
986 | return ret; | |
987 | spin_lock(&sbi->s_fc_lock); | |
988 | } | |
989 | spin_unlock(&sbi->s_fc_lock); | |
990 | ||
991 | return ret; | |
992 | } | |
993 | ||
994 | /* Wait for completion of data for all the fast commit inodes */ | |
995 | static int ext4_fc_wait_inode_data_all(journal_t *journal) | |
996 | { | |
c30365b9 | 997 | struct super_block *sb = journal->j_private; |
aa75f4d3 HS |
998 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
999 | struct ext4_inode_info *pos, *n; | |
1000 | int ret = 0; | |
1001 | ||
1002 | spin_lock(&sbi->s_fc_lock); | |
1003 | list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { | |
1004 | if (!ext4_test_inode_state(&pos->vfs_inode, | |
1005 | EXT4_STATE_FC_COMMITTING)) | |
1006 | continue; | |
1007 | spin_unlock(&sbi->s_fc_lock); | |
1008 | ||
1009 | ret = jbd2_wait_inode_data(journal, pos->jinode); | |
1010 | if (ret) | |
1011 | return ret; | |
1012 | spin_lock(&sbi->s_fc_lock); | |
1013 | } | |
1014 | spin_unlock(&sbi->s_fc_lock); | |
1015 | ||
1016 | return 0; | |
1017 | } | |
1018 | ||
1019 | /* Commit all the directory entry updates */ | |
1020 | static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) | |
fa329e27 TT |
1021 | __acquires(&sbi->s_fc_lock) |
1022 | __releases(&sbi->s_fc_lock) | |
aa75f4d3 | 1023 | { |
c30365b9 | 1024 | struct super_block *sb = journal->j_private; |
aa75f4d3 | 1025 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
96e7c02d | 1026 | struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; |
aa75f4d3 | 1027 | struct inode *inode; |
b3998b3b | 1028 | struct ext4_inode_info *ei; |
aa75f4d3 HS |
1029 | int ret; |
1030 | ||
1031 | if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) | |
1032 | return 0; | |
96e7c02d DP |
1033 | list_for_each_entry_safe(fc_dentry, fc_dentry_n, |
1034 | &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) { | |
aa75f4d3 HS |
1035 | if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { |
1036 | spin_unlock(&sbi->s_fc_lock); | |
facec450 | 1037 | if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { |
aa75f4d3 HS |
1038 | ret = -ENOSPC; |
1039 | goto lock_and_exit; | |
1040 | } | |
1041 | spin_lock(&sbi->s_fc_lock); | |
1042 | continue; | |
1043 | } | |
aa75f4d3 | 1044 | /* |
b3998b3b RH |
1045 | * With fcd_dilist we need not loop in sbi->s_fc_q to get the |
1046 | * corresponding inode pointer | |
aa75f4d3 | 1047 | */ |
b3998b3b RH |
1048 | WARN_ON(list_empty(&fc_dentry->fcd_dilist)); |
1049 | ei = list_first_entry(&fc_dentry->fcd_dilist, | |
1050 | struct ext4_inode_info, i_fc_dilist); | |
1051 | inode = &ei->vfs_inode; | |
1052 | WARN_ON(inode->i_ino != fc_dentry->fcd_ino); | |
1053 | ||
aa75f4d3 HS |
1054 | spin_unlock(&sbi->s_fc_lock); |
1055 | ||
1056 | /* | |
1057 | * We first write the inode and then the create dirent. This | |
1058 | * allows the recovery code to create an unnamed inode first | |
1059 | * and then link it to a directory entry. This allows us | |
1060 | * to use namei.c routines almost as is and simplifies | |
1061 | * the recovery code. | |
1062 | */ | |
1063 | ret = ext4_fc_write_inode(inode, crc); | |
1064 | if (ret) | |
1065 | goto lock_and_exit; | |
1066 | ||
1067 | ret = ext4_fc_write_inode_data(inode, crc); | |
1068 | if (ret) | |
1069 | goto lock_and_exit; | |
1070 | ||
facec450 | 1071 | if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { |
aa75f4d3 HS |
1072 | ret = -ENOSPC; |
1073 | goto lock_and_exit; | |
1074 | } | |
1075 | ||
1076 | spin_lock(&sbi->s_fc_lock); | |
1077 | } | |
1078 | return 0; | |
1079 | lock_and_exit: | |
1080 | spin_lock(&sbi->s_fc_lock); | |
1081 | return ret; | |
1082 | } | |
1083 | ||
1084 | static int ext4_fc_perform_commit(journal_t *journal) | |
1085 | { | |
c30365b9 | 1086 | struct super_block *sb = journal->j_private; |
aa75f4d3 HS |
1087 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1088 | struct ext4_inode_info *iter; | |
1089 | struct ext4_fc_head head; | |
aa75f4d3 HS |
1090 | struct inode *inode; |
1091 | struct blk_plug plug; | |
1092 | int ret = 0; | |
1093 | u32 crc = 0; | |
1094 | ||
1095 | ret = ext4_fc_submit_inode_data_all(journal); | |
1096 | if (ret) | |
1097 | return ret; | |
1098 | ||
1099 | ret = ext4_fc_wait_inode_data_all(journal); | |
1100 | if (ret) | |
1101 | return ret; | |
1102 | ||
da0c5d26 HS |
1103 | /* |
1104 | * If file system device is different from journal device, issue a cache | |
1105 | * flush before we start writing fast commit blocks. | |
1106 | */ | |
1107 | if (journal->j_fs_dev != journal->j_dev) | |
c6bf3f0e | 1108 | blkdev_issue_flush(journal->j_fs_dev); |
da0c5d26 | 1109 | |
aa75f4d3 HS |
1110 | blk_start_plug(&plug); |
1111 | if (sbi->s_fc_bytes == 0) { | |
1112 | /* | |
1113 | * Add a head tag only if this is the first fast commit | |
1114 | * in this TID. | |
1115 | */ | |
1116 | head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); | |
1117 | head.fc_tid = cpu_to_le32( | |
1118 | sbi->s_journal->j_running_transaction->t_tid); | |
1119 | if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), | |
e1262cd2 XY |
1120 | (u8 *)&head, &crc)) { |
1121 | ret = -ENOSPC; | |
aa75f4d3 | 1122 | goto out; |
e1262cd2 | 1123 | } |
aa75f4d3 HS |
1124 | } |
1125 | ||
1126 | spin_lock(&sbi->s_fc_lock); | |
1127 | ret = ext4_fc_commit_dentry_updates(journal, &crc); | |
1128 | if (ret) { | |
1129 | spin_unlock(&sbi->s_fc_lock); | |
1130 | goto out; | |
1131 | } | |
1132 | ||
96e7c02d | 1133 | list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { |
aa75f4d3 HS |
1134 | inode = &iter->vfs_inode; |
1135 | if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) | |
1136 | continue; | |
1137 | ||
1138 | spin_unlock(&sbi->s_fc_lock); | |
1139 | ret = ext4_fc_write_inode_data(inode, &crc); | |
1140 | if (ret) | |
1141 | goto out; | |
1142 | ret = ext4_fc_write_inode(inode, &crc); | |
1143 | if (ret) | |
1144 | goto out; | |
1145 | spin_lock(&sbi->s_fc_lock); | |
aa75f4d3 HS |
1146 | } |
1147 | spin_unlock(&sbi->s_fc_lock); | |
1148 | ||
1149 | ret = ext4_fc_write_tail(sb, crc); | |
1150 | ||
1151 | out: | |
1152 | blk_finish_plug(&plug); | |
1153 | return ret; | |
1154 | } | |
1155 | ||
0915e464 | 1156 | static void ext4_fc_update_stats(struct super_block *sb, int status, |
d9bf099c | 1157 | u64 commit_time, int nblks, tid_t commit_tid) |
0915e464 HS |
1158 | { |
1159 | struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; | |
1160 | ||
4978c659 | 1161 | ext4_debug("Fast commit ended with status = %d for tid %u", |
d9bf099c | 1162 | status, commit_tid); |
0915e464 HS |
1163 | if (status == EXT4_FC_STATUS_OK) { |
1164 | stats->fc_num_commits++; | |
1165 | stats->fc_numblks += nblks; | |
1166 | if (likely(stats->s_fc_avg_commit_time)) | |
1167 | stats->s_fc_avg_commit_time = | |
1168 | (commit_time + | |
1169 | stats->s_fc_avg_commit_time * 3) / 4; | |
1170 | else | |
1171 | stats->s_fc_avg_commit_time = commit_time; | |
1172 | } else if (status == EXT4_FC_STATUS_FAILED || | |
1173 | status == EXT4_FC_STATUS_INELIGIBLE) { | |
1174 | if (status == EXT4_FC_STATUS_FAILED) | |
1175 | stats->fc_failed_commits++; | |
1176 | stats->fc_ineligible_commits++; | |
1177 | } else { | |
1178 | stats->fc_skipped_commits++; | |
1179 | } | |
5641ace5 | 1180 | trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); |
0915e464 HS |
1181 | } |
1182 | ||
aa75f4d3 HS |
1183 | /* |
1184 | * The main commit entry point. Performs a fast commit for transaction | |
1185 | * commit_tid if needed. If it's not possible to perform a fast commit | |
1186 | * due to various reasons, we fall back to full commit. Returns 0 | |
1187 | * on success, error otherwise. | |
1188 | */ | |
1189 | int ext4_fc_commit(journal_t *journal, tid_t commit_tid) | |
1190 | { | |
c30365b9 | 1191 | struct super_block *sb = journal->j_private; |
aa75f4d3 HS |
1192 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1193 | int nblks = 0, ret, bsize = journal->j_blocksize; | |
1194 | int subtid = atomic_read(&sbi->s_fc_subtid); | |
0915e464 | 1195 | int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; |
aa75f4d3 HS |
1196 | ktime_t start_time, commit_time; |
1197 | ||
7f142440 RH |
1198 | if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) |
1199 | return jbd2_complete_transaction(journal, commit_tid); | |
1200 | ||
5641ace5 | 1201 | trace_ext4_fc_commit_start(sb, commit_tid); |
aa75f4d3 HS |
1202 | |
1203 | start_time = ktime_get(); | |
1204 | ||
aa75f4d3 HS |
1205 | restart_fc: |
1206 | ret = jbd2_fc_begin_commit(journal, commit_tid); | |
1207 | if (ret == -EALREADY) { | |
1208 | /* There was an ongoing commit, check if we need to restart */ | |
1209 | if (atomic_read(&sbi->s_fc_subtid) <= subtid && | |
1210 | commit_tid > journal->j_commit_sequence) | |
1211 | goto restart_fc; | |
d9bf099c RH |
1212 | ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, |
1213 | commit_tid); | |
0915e464 | 1214 | return 0; |
aa75f4d3 | 1215 | } else if (ret) { |
0915e464 HS |
1216 | /* |
1217 | * Commit couldn't start. Just update stats and perform a | |
1218 | * full commit. | |
1219 | */ | |
d9bf099c RH |
1220 | ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, |
1221 | commit_tid); | |
0915e464 | 1222 | return jbd2_complete_transaction(journal, commit_tid); |
aa75f4d3 | 1223 | } |
0915e464 | 1224 | |
7bbbe241 HS |
1225 | /* |
1226 | * After establishing journal barrier via jbd2_fc_begin_commit(), check | |
1227 | * if we are fast commit ineligible. | |
1228 | */ | |
1229 | if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { | |
0915e464 HS |
1230 | status = EXT4_FC_STATUS_INELIGIBLE; |
1231 | goto fallback; | |
7bbbe241 | 1232 | } |
aa75f4d3 HS |
1233 | |
1234 | fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; | |
1235 | ret = ext4_fc_perform_commit(journal); | |
1236 | if (ret < 0) { | |
0915e464 HS |
1237 | status = EXT4_FC_STATUS_FAILED; |
1238 | goto fallback; | |
aa75f4d3 HS |
1239 | } |
1240 | nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; | |
1241 | ret = jbd2_fc_wait_bufs(journal, nblks); | |
1242 | if (ret < 0) { | |
0915e464 HS |
1243 | status = EXT4_FC_STATUS_FAILED; |
1244 | goto fallback; | |
aa75f4d3 HS |
1245 | } |
1246 | atomic_inc(&sbi->s_fc_subtid); | |
0915e464 | 1247 | ret = jbd2_fc_end_commit(journal); |
aa75f4d3 | 1248 | /* |
0915e464 HS |
1249 | * weight the commit time higher than the average time so we |
1250 | * don't react too strongly to vast changes in the commit time | |
aa75f4d3 | 1251 | */ |
0915e464 | 1252 | commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
d9bf099c | 1253 | ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); |
0915e464 HS |
1254 | return ret; |
1255 | ||
1256 | fallback: | |
1257 | ret = jbd2_fc_end_commit_fallback(journal); | |
d9bf099c | 1258 | ext4_fc_update_stats(sb, status, 0, 0, commit_tid); |
0915e464 | 1259 | return ret; |
aa75f4d3 HS |
1260 | } |
1261 | ||
ff780b91 HS |
1262 | /* |
1263 | * Fast commit cleanup routine. This is called after every fast commit and | |
1264 | * full commit. full is true if we are called after a full commit. | |
1265 | */ | |
e85c81ba | 1266 | static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) |
ff780b91 | 1267 | { |
aa75f4d3 HS |
1268 | struct super_block *sb = journal->j_private; |
1269 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
96e7c02d | 1270 | struct ext4_inode_info *iter, *iter_n; |
aa75f4d3 | 1271 | struct ext4_fc_dentry_update *fc_dentry; |
aa75f4d3 HS |
1272 | |
1273 | if (full && sbi->s_fc_bh) | |
1274 | sbi->s_fc_bh = NULL; | |
1275 | ||
08f4c42a | 1276 | trace_ext4_fc_cleanup(journal, full, tid); |
aa75f4d3 HS |
1277 | jbd2_fc_release_bufs(journal); |
1278 | ||
1279 | spin_lock(&sbi->s_fc_lock); | |
96e7c02d DP |
1280 | list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN], |
1281 | i_fc_list) { | |
aa75f4d3 HS |
1282 | list_del_init(&iter->i_fc_list); |
1283 | ext4_clear_inode_state(&iter->vfs_inode, | |
1284 | EXT4_STATE_FC_COMMITTING); | |
bdc8a53a XY |
1285 | if (iter->i_sync_tid <= tid) |
1286 | ext4_fc_reset_inode(&iter->vfs_inode); | |
aa75f4d3 HS |
1287 | /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ |
1288 | smp_mb(); | |
1289 | #if (BITS_PER_LONG < 64) | |
1290 | wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); | |
1291 | #else | |
1292 | wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); | |
1293 | #endif | |
1294 | } | |
1295 | ||
1296 | while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { | |
1297 | fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], | |
1298 | struct ext4_fc_dentry_update, | |
1299 | fcd_list); | |
1300 | list_del_init(&fc_dentry->fcd_list); | |
b3998b3b | 1301 | list_del_init(&fc_dentry->fcd_dilist); |
aa75f4d3 HS |
1302 | spin_unlock(&sbi->s_fc_lock); |
1303 | ||
1304 | if (fc_dentry->fcd_name.name && | |
1305 | fc_dentry->fcd_name.len > DNAME_INLINE_LEN) | |
1306 | kfree(fc_dentry->fcd_name.name); | |
1307 | kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); | |
1308 | spin_lock(&sbi->s_fc_lock); | |
1309 | } | |
1310 | ||
1311 | list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], | |
1312 | &sbi->s_fc_dentry_q[FC_Q_MAIN]); | |
1313 | list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], | |
31e203e0 | 1314 | &sbi->s_fc_q[FC_Q_MAIN]); |
aa75f4d3 | 1315 | |
e85c81ba XY |
1316 | if (tid >= sbi->s_fc_ineligible_tid) { |
1317 | sbi->s_fc_ineligible_tid = 0; | |
1318 | ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); | |
1319 | } | |
aa75f4d3 HS |
1320 | |
1321 | if (full) | |
1322 | sbi->s_fc_bytes = 0; | |
1323 | spin_unlock(&sbi->s_fc_lock); | |
1324 | trace_ext4_fc_stats(sb); | |
ff780b91 | 1325 | } |
6866d7b3 | 1326 | |
8016e29f HS |
1327 | /* Ext4 Replay Path Routines */ |
1328 | ||
8016e29f HS |
1329 | /* Helper struct for dentry replay routines */ |
1330 | struct dentry_info_args { | |
1331 | int parent_ino, dname_len, ino, inode_len; | |
1332 | char *dname; | |
1333 | }; | |
1334 | ||
11768cfd EB |
1335 | /* Same as struct ext4_fc_tl, but uses native endianness fields */ |
1336 | struct ext4_fc_tl_mem { | |
1337 | u16 fc_tag; | |
1338 | u16 fc_len; | |
1339 | }; | |
1340 | ||
8016e29f | 1341 | static inline void tl_to_darg(struct dentry_info_args *darg, |
11768cfd | 1342 | struct ext4_fc_tl_mem *tl, u8 *val) |
8016e29f | 1343 | { |
a7ba36bc | 1344 | struct ext4_fc_dentry_info fcd; |
8016e29f | 1345 | |
a7ba36bc | 1346 | memcpy(&fcd, val, sizeof(fcd)); |
8016e29f | 1347 | |
a7ba36bc HS |
1348 | darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); |
1349 | darg->ino = le32_to_cpu(fcd.fc_ino); | |
1350 | darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); | |
dcc58274 YB |
1351 | darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); |
1352 | } | |
1353 | ||
11768cfd | 1354 | static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val) |
dcc58274 | 1355 | { |
11768cfd EB |
1356 | struct ext4_fc_tl tl_disk; |
1357 | ||
1358 | memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN); | |
1359 | tl->fc_len = le16_to_cpu(tl_disk.fc_len); | |
1360 | tl->fc_tag = le16_to_cpu(tl_disk.fc_tag); | |
8016e29f HS |
1361 | } |
1362 | ||
1363 | /* Unlink replay function */ | |
11768cfd EB |
1364 | static int ext4_fc_replay_unlink(struct super_block *sb, |
1365 | struct ext4_fc_tl_mem *tl, u8 *val) | |
8016e29f HS |
1366 | { |
1367 | struct inode *inode, *old_parent; | |
1368 | struct qstr entry; | |
1369 | struct dentry_info_args darg; | |
1370 | int ret = 0; | |
1371 | ||
a7ba36bc | 1372 | tl_to_darg(&darg, tl, val); |
8016e29f HS |
1373 | |
1374 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, | |
1375 | darg.parent_ino, darg.dname_len); | |
1376 | ||
1377 | entry.name = darg.dname; | |
1378 | entry.len = darg.dname_len; | |
1379 | inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); | |
1380 | ||
23dd561a | 1381 | if (IS_ERR(inode)) { |
4978c659 | 1382 | ext4_debug("Inode %d not found", darg.ino); |
8016e29f HS |
1383 | return 0; |
1384 | } | |
1385 | ||
1386 | old_parent = ext4_iget(sb, darg.parent_ino, | |
1387 | EXT4_IGET_NORMAL); | |
23dd561a | 1388 | if (IS_ERR(old_parent)) { |
4978c659 | 1389 | ext4_debug("Dir with inode %d not found", darg.parent_ino); |
8016e29f HS |
1390 | iput(inode); |
1391 | return 0; | |
1392 | } | |
1393 | ||
4c0d5778 | 1394 | ret = __ext4_unlink(old_parent, &entry, inode, NULL); |
8016e29f HS |
1395 | /* -ENOENT ok coz it might not exist anymore. */ |
1396 | if (ret == -ENOENT) | |
1397 | ret = 0; | |
1398 | iput(old_parent); | |
1399 | iput(inode); | |
1400 | return ret; | |
1401 | } | |
1402 | ||
1403 | static int ext4_fc_replay_link_internal(struct super_block *sb, | |
1404 | struct dentry_info_args *darg, | |
1405 | struct inode *inode) | |
1406 | { | |
1407 | struct inode *dir = NULL; | |
1408 | struct dentry *dentry_dir = NULL, *dentry_inode = NULL; | |
1409 | struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); | |
1410 | int ret = 0; | |
1411 | ||
1412 | dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); | |
1413 | if (IS_ERR(dir)) { | |
4978c659 | 1414 | ext4_debug("Dir with inode %d not found.", darg->parent_ino); |
8016e29f HS |
1415 | dir = NULL; |
1416 | goto out; | |
1417 | } | |
1418 | ||
1419 | dentry_dir = d_obtain_alias(dir); | |
1420 | if (IS_ERR(dentry_dir)) { | |
4978c659 | 1421 | ext4_debug("Failed to obtain dentry"); |
8016e29f HS |
1422 | dentry_dir = NULL; |
1423 | goto out; | |
1424 | } | |
1425 | ||
1426 | dentry_inode = d_alloc(dentry_dir, &qstr_dname); | |
1427 | if (!dentry_inode) { | |
4978c659 | 1428 | ext4_debug("Inode dentry not created."); |
8016e29f HS |
1429 | ret = -ENOMEM; |
1430 | goto out; | |
1431 | } | |
1432 | ||
1433 | ret = __ext4_link(dir, inode, dentry_inode); | |
1434 | /* | |
1435 | * It's possible that link already existed since data blocks | |
1436 | * for the dir in question got persisted before we crashed OR | |
1437 | * we replayed this tag and crashed before the entire replay | |
1438 | * could complete. | |
1439 | */ | |
1440 | if (ret && ret != -EEXIST) { | |
4978c659 | 1441 | ext4_debug("Failed to link\n"); |
8016e29f HS |
1442 | goto out; |
1443 | } | |
1444 | ||
1445 | ret = 0; | |
1446 | out: | |
1447 | if (dentry_dir) { | |
1448 | d_drop(dentry_dir); | |
1449 | dput(dentry_dir); | |
1450 | } else if (dir) { | |
1451 | iput(dir); | |
1452 | } | |
1453 | if (dentry_inode) { | |
1454 | d_drop(dentry_inode); | |
1455 | dput(dentry_inode); | |
1456 | } | |
1457 | ||
1458 | return ret; | |
1459 | } | |
1460 | ||
1461 | /* Link replay function */ | |
11768cfd EB |
1462 | static int ext4_fc_replay_link(struct super_block *sb, |
1463 | struct ext4_fc_tl_mem *tl, u8 *val) | |
8016e29f HS |
1464 | { |
1465 | struct inode *inode; | |
1466 | struct dentry_info_args darg; | |
1467 | int ret = 0; | |
1468 | ||
a7ba36bc | 1469 | tl_to_darg(&darg, tl, val); |
8016e29f HS |
1470 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, |
1471 | darg.parent_ino, darg.dname_len); | |
1472 | ||
1473 | inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); | |
23dd561a | 1474 | if (IS_ERR(inode)) { |
4978c659 | 1475 | ext4_debug("Inode not found."); |
8016e29f HS |
1476 | return 0; |
1477 | } | |
1478 | ||
1479 | ret = ext4_fc_replay_link_internal(sb, &darg, inode); | |
1480 | iput(inode); | |
1481 | return ret; | |
1482 | } | |
1483 | ||
1484 | /* | |
1485 | * Record all the modified inodes during replay. We use this later to setup | |
1486 | * block bitmaps correctly. | |
1487 | */ | |
1488 | static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) | |
1489 | { | |
1490 | struct ext4_fc_replay_state *state; | |
1491 | int i; | |
1492 | ||
1493 | state = &EXT4_SB(sb)->s_fc_replay_state; | |
1494 | for (i = 0; i < state->fc_modified_inodes_used; i++) | |
1495 | if (state->fc_modified_inodes[i] == ino) | |
1496 | return 0; | |
1497 | if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { | |
9305721a YB |
1498 | int *fc_modified_inodes; |
1499 | ||
1500 | fc_modified_inodes = krealloc(state->fc_modified_inodes, | |
cdce59a1 RH |
1501 | sizeof(int) * (state->fc_modified_inodes_size + |
1502 | EXT4_FC_REPLAY_REALLOC_INCREMENT), | |
1503 | GFP_KERNEL); | |
9305721a | 1504 | if (!fc_modified_inodes) |
8016e29f | 1505 | return -ENOMEM; |
9305721a | 1506 | state->fc_modified_inodes = fc_modified_inodes; |
cdce59a1 RH |
1507 | state->fc_modified_inodes_size += |
1508 | EXT4_FC_REPLAY_REALLOC_INCREMENT; | |
8016e29f HS |
1509 | } |
1510 | state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; | |
1511 | return 0; | |
1512 | } | |
1513 | ||
1514 | /* | |
1515 | * Inode replay function | |
1516 | */ | |
11768cfd EB |
1517 | static int ext4_fc_replay_inode(struct super_block *sb, |
1518 | struct ext4_fc_tl_mem *tl, u8 *val) | |
8016e29f | 1519 | { |
a7ba36bc | 1520 | struct ext4_fc_inode fc_inode; |
8016e29f HS |
1521 | struct ext4_inode *raw_inode; |
1522 | struct ext4_inode *raw_fc_inode; | |
1523 | struct inode *inode = NULL; | |
1524 | struct ext4_iloc iloc; | |
dcc58274 | 1525 | int inode_len, ino, ret, tag = tl->fc_tag; |
8016e29f | 1526 | struct ext4_extent_header *eh; |
0d043351 | 1527 | size_t off_gen = offsetof(struct ext4_inode, i_generation); |
8016e29f | 1528 | |
a7ba36bc | 1529 | memcpy(&fc_inode, val, sizeof(fc_inode)); |
8016e29f | 1530 | |
a7ba36bc | 1531 | ino = le32_to_cpu(fc_inode.fc_ino); |
8016e29f HS |
1532 | trace_ext4_fc_replay(sb, tag, ino, 0, 0); |
1533 | ||
1534 | inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); | |
23dd561a | 1535 | if (!IS_ERR(inode)) { |
8016e29f HS |
1536 | ext4_ext_clear_bb(inode); |
1537 | iput(inode); | |
1538 | } | |
23dd561a | 1539 | inode = NULL; |
8016e29f | 1540 | |
cdce59a1 RH |
1541 | ret = ext4_fc_record_modified_inode(sb, ino); |
1542 | if (ret) | |
1543 | goto out; | |
8016e29f | 1544 | |
a7ba36bc HS |
1545 | raw_fc_inode = (struct ext4_inode *) |
1546 | (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); | |
8016e29f HS |
1547 | ret = ext4_get_fc_inode_loc(sb, ino, &iloc); |
1548 | if (ret) | |
1549 | goto out; | |
1550 | ||
dcc58274 | 1551 | inode_len = tl->fc_len - sizeof(struct ext4_fc_inode); |
8016e29f HS |
1552 | raw_inode = ext4_raw_inode(&iloc); |
1553 | ||
1554 | memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); | |
0d043351 TT |
1555 | memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen, |
1556 | inode_len - off_gen); | |
8016e29f HS |
1557 | if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { |
1558 | eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); | |
1559 | if (eh->eh_magic != EXT4_EXT_MAGIC) { | |
1560 | memset(eh, 0, sizeof(*eh)); | |
1561 | eh->eh_magic = EXT4_EXT_MAGIC; | |
1562 | eh->eh_max = cpu_to_le16( | |
1563 | (sizeof(raw_inode->i_block) - | |
1564 | sizeof(struct ext4_extent_header)) | |
1565 | / sizeof(struct ext4_extent)); | |
1566 | } | |
1567 | } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { | |
1568 | memcpy(raw_inode->i_block, raw_fc_inode->i_block, | |
1569 | sizeof(raw_inode->i_block)); | |
1570 | } | |
1571 | ||
1572 | /* Immediately update the inode on disk. */ | |
1573 | ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); | |
1574 | if (ret) | |
1575 | goto out; | |
1576 | ret = sync_dirty_buffer(iloc.bh); | |
1577 | if (ret) | |
1578 | goto out; | |
1579 | ret = ext4_mark_inode_used(sb, ino); | |
1580 | if (ret) | |
1581 | goto out; | |
1582 | ||
1583 | /* Given that we just wrote the inode on disk, this SHOULD succeed. */ | |
1584 | inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); | |
23dd561a | 1585 | if (IS_ERR(inode)) { |
4978c659 | 1586 | ext4_debug("Inode not found."); |
8016e29f HS |
1587 | return -EFSCORRUPTED; |
1588 | } | |
1589 | ||
1590 | /* | |
1591 | * Our allocator could have made different decisions than before | |
1592 | * crashing. This should be fixed but until then, we calculate | |
1593 | * the number of blocks the inode. | |
1594 | */ | |
1ebf2178 HS |
1595 | if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) |
1596 | ext4_ext_replay_set_iblocks(inode); | |
8016e29f HS |
1597 | |
1598 | inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); | |
1599 | ext4_reset_inode_seed(inode); | |
1600 | ||
1601 | ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); | |
1602 | ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); | |
1603 | sync_dirty_buffer(iloc.bh); | |
1604 | brelse(iloc.bh); | |
1605 | out: | |
1606 | iput(inode); | |
1607 | if (!ret) | |
c6bf3f0e | 1608 | blkdev_issue_flush(sb->s_bdev); |
8016e29f HS |
1609 | |
1610 | return 0; | |
1611 | } | |
1612 | ||
1613 | /* | |
1614 | * Dentry create replay function. | |
1615 | * | |
1616 | * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the | |
1617 | * inode for which we are trying to create a dentry here, should already have | |
1618 | * been replayed before we start here. | |
1619 | */ | |
11768cfd EB |
1620 | static int ext4_fc_replay_create(struct super_block *sb, |
1621 | struct ext4_fc_tl_mem *tl, u8 *val) | |
8016e29f HS |
1622 | { |
1623 | int ret = 0; | |
1624 | struct inode *inode = NULL; | |
1625 | struct inode *dir = NULL; | |
1626 | struct dentry_info_args darg; | |
1627 | ||
a7ba36bc | 1628 | tl_to_darg(&darg, tl, val); |
8016e29f HS |
1629 | |
1630 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, | |
1631 | darg.parent_ino, darg.dname_len); | |
1632 | ||
1633 | /* This takes care of update group descriptor and other metadata */ | |
1634 | ret = ext4_mark_inode_used(sb, darg.ino); | |
1635 | if (ret) | |
1636 | goto out; | |
1637 | ||
1638 | inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); | |
23dd561a | 1639 | if (IS_ERR(inode)) { |
4978c659 | 1640 | ext4_debug("inode %d not found.", darg.ino); |
8016e29f HS |
1641 | inode = NULL; |
1642 | ret = -EINVAL; | |
1643 | goto out; | |
1644 | } | |
1645 | ||
1646 | if (S_ISDIR(inode->i_mode)) { | |
1647 | /* | |
1648 | * If we are creating a directory, we need to make sure that the | |
1649 | * dot and dot dot dirents are setup properly. | |
1650 | */ | |
1651 | dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); | |
23dd561a | 1652 | if (IS_ERR(dir)) { |
4978c659 | 1653 | ext4_debug("Dir %d not found.", darg.ino); |
8016e29f HS |
1654 | goto out; |
1655 | } | |
1656 | ret = ext4_init_new_dir(NULL, dir, inode); | |
1657 | iput(dir); | |
1658 | if (ret) { | |
1659 | ret = 0; | |
1660 | goto out; | |
1661 | } | |
1662 | } | |
1663 | ret = ext4_fc_replay_link_internal(sb, &darg, inode); | |
1664 | if (ret) | |
1665 | goto out; | |
1666 | set_nlink(inode, 1); | |
1667 | ext4_mark_inode_dirty(NULL, inode); | |
1668 | out: | |
784a0995 | 1669 | iput(inode); |
8016e29f HS |
1670 | return ret; |
1671 | } | |
1672 | ||
1673 | /* | |
599ea31d XY |
1674 | * Record physical disk regions which are in use as per fast commit area, |
1675 | * and used by inodes during replay phase. Our simple replay phase | |
1676 | * allocator excludes these regions from allocation. | |
8016e29f | 1677 | */ |
599ea31d XY |
1678 | int ext4_fc_record_regions(struct super_block *sb, int ino, |
1679 | ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) | |
8016e29f HS |
1680 | { |
1681 | struct ext4_fc_replay_state *state; | |
1682 | struct ext4_fc_alloc_region *region; | |
1683 | ||
1684 | state = &EXT4_SB(sb)->s_fc_replay_state; | |
599ea31d XY |
1685 | /* |
1686 | * during replay phase, the fc_regions_valid may not same as | |
1687 | * fc_regions_used, update it when do new additions. | |
1688 | */ | |
1689 | if (replay && state->fc_regions_used != state->fc_regions_valid) | |
1690 | state->fc_regions_used = state->fc_regions_valid; | |
8016e29f | 1691 | if (state->fc_regions_used == state->fc_regions_size) { |
7069d105 YB |
1692 | struct ext4_fc_alloc_region *fc_regions; |
1693 | ||
7069d105 | 1694 | fc_regions = krealloc(state->fc_regions, |
27cd4978 YB |
1695 | sizeof(struct ext4_fc_alloc_region) * |
1696 | (state->fc_regions_size + | |
1697 | EXT4_FC_REPLAY_REALLOC_INCREMENT), | |
7069d105 YB |
1698 | GFP_KERNEL); |
1699 | if (!fc_regions) | |
8016e29f | 1700 | return -ENOMEM; |
27cd4978 YB |
1701 | state->fc_regions_size += |
1702 | EXT4_FC_REPLAY_REALLOC_INCREMENT; | |
7069d105 | 1703 | state->fc_regions = fc_regions; |
8016e29f HS |
1704 | } |
1705 | region = &state->fc_regions[state->fc_regions_used++]; | |
1706 | region->ino = ino; | |
1707 | region->lblk = lblk; | |
1708 | region->pblk = pblk; | |
1709 | region->len = len; | |
1710 | ||
599ea31d XY |
1711 | if (replay) |
1712 | state->fc_regions_valid++; | |
1713 | ||
8016e29f HS |
1714 | return 0; |
1715 | } | |
1716 | ||
1717 | /* Replay add range tag */ | |
1718 | static int ext4_fc_replay_add_range(struct super_block *sb, | |
11768cfd | 1719 | struct ext4_fc_tl_mem *tl, u8 *val) |
8016e29f | 1720 | { |
a7ba36bc | 1721 | struct ext4_fc_add_range fc_add_ex; |
8016e29f HS |
1722 | struct ext4_extent newex, *ex; |
1723 | struct inode *inode; | |
1724 | ext4_lblk_t start, cur; | |
1725 | int remaining, len; | |
1726 | ext4_fsblk_t start_pblk; | |
1727 | struct ext4_map_blocks map; | |
1728 | struct ext4_ext_path *path = NULL; | |
1729 | int ret; | |
1730 | ||
a7ba36bc HS |
1731 | memcpy(&fc_add_ex, val, sizeof(fc_add_ex)); |
1732 | ex = (struct ext4_extent *)&fc_add_ex.fc_ex; | |
8016e29f HS |
1733 | |
1734 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, | |
a7ba36bc | 1735 | le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block), |
8016e29f HS |
1736 | ext4_ext_get_actual_len(ex)); |
1737 | ||
a7ba36bc | 1738 | inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); |
23dd561a | 1739 | if (IS_ERR(inode)) { |
4978c659 | 1740 | ext4_debug("Inode not found."); |
8016e29f HS |
1741 | return 0; |
1742 | } | |
1743 | ||
1744 | ret = ext4_fc_record_modified_inode(sb, inode->i_ino); | |
cdce59a1 RH |
1745 | if (ret) |
1746 | goto out; | |
8016e29f HS |
1747 | |
1748 | start = le32_to_cpu(ex->ee_block); | |
1749 | start_pblk = ext4_ext_pblock(ex); | |
1750 | len = ext4_ext_get_actual_len(ex); | |
1751 | ||
1752 | cur = start; | |
1753 | remaining = len; | |
4978c659 | 1754 | ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", |
8016e29f HS |
1755 | start, start_pblk, len, ext4_ext_is_unwritten(ex), |
1756 | inode->i_ino); | |
1757 | ||
1758 | while (remaining > 0) { | |
1759 | map.m_lblk = cur; | |
1760 | map.m_len = remaining; | |
1761 | map.m_pblk = 0; | |
1762 | ret = ext4_map_blocks(NULL, inode, &map, 0); | |
1763 | ||
cdce59a1 RH |
1764 | if (ret < 0) |
1765 | goto out; | |
8016e29f HS |
1766 | |
1767 | if (ret == 0) { | |
1768 | /* Range is not mapped */ | |
1769 | path = ext4_find_extent(inode, cur, NULL, 0); | |
cdce59a1 RH |
1770 | if (IS_ERR(path)) |
1771 | goto out; | |
8016e29f HS |
1772 | memset(&newex, 0, sizeof(newex)); |
1773 | newex.ee_block = cpu_to_le32(cur); | |
1774 | ext4_ext_store_pblock( | |
1775 | &newex, start_pblk + cur - start); | |
1776 | newex.ee_len = cpu_to_le16(map.m_len); | |
1777 | if (ext4_ext_is_unwritten(ex)) | |
1778 | ext4_ext_mark_unwritten(&newex); | |
1779 | down_write(&EXT4_I(inode)->i_data_sem); | |
1780 | ret = ext4_ext_insert_extent( | |
1781 | NULL, inode, &path, &newex, 0); | |
1782 | up_write((&EXT4_I(inode)->i_data_sem)); | |
7ff5fdda | 1783 | ext4_free_ext_path(path); |
cdce59a1 RH |
1784 | if (ret) |
1785 | goto out; | |
8016e29f HS |
1786 | goto next; |
1787 | } | |
1788 | ||
1789 | if (start_pblk + cur - start != map.m_pblk) { | |
1790 | /* | |
1791 | * Logical to physical mapping changed. This can happen | |
1792 | * if this range was removed and then reallocated to | |
1793 | * map to new physical blocks during a fast commit. | |
1794 | */ | |
1795 | ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, | |
1796 | ext4_ext_is_unwritten(ex), | |
1797 | start_pblk + cur - start); | |
cdce59a1 RH |
1798 | if (ret) |
1799 | goto out; | |
8016e29f HS |
1800 | /* |
1801 | * Mark the old blocks as free since they aren't used | |
1802 | * anymore. We maintain an array of all the modified | |
1803 | * inodes. In case these blocks are still used at either | |
1804 | * a different logical range in the same inode or in | |
1805 | * some different inode, we will mark them as allocated | |
1806 | * at the end of the FC replay using our array of | |
1807 | * modified inodes. | |
1808 | */ | |
1809 | ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); | |
1810 | goto next; | |
1811 | } | |
1812 | ||
1813 | /* Range is mapped and needs a state change */ | |
4978c659 | 1814 | ext4_debug("Converting from %ld to %d %lld", |
8016e29f HS |
1815 | map.m_flags & EXT4_MAP_UNWRITTEN, |
1816 | ext4_ext_is_unwritten(ex), map.m_pblk); | |
1817 | ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, | |
1818 | ext4_ext_is_unwritten(ex), map.m_pblk); | |
cdce59a1 RH |
1819 | if (ret) |
1820 | goto out; | |
8016e29f HS |
1821 | /* |
1822 | * We may have split the extent tree while toggling the state. | |
1823 | * Try to shrink the extent tree now. | |
1824 | */ | |
1825 | ext4_ext_replay_shrink_inode(inode, start + len); | |
1826 | next: | |
1827 | cur += map.m_len; | |
1828 | remaining -= map.m_len; | |
1829 | } | |
1830 | ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> | |
1831 | sb->s_blocksize_bits); | |
cdce59a1 | 1832 | out: |
8016e29f HS |
1833 | iput(inode); |
1834 | return 0; | |
1835 | } | |
1836 | ||
1837 | /* Replay DEL_RANGE tag */ | |
1838 | static int | |
11768cfd EB |
1839 | ext4_fc_replay_del_range(struct super_block *sb, |
1840 | struct ext4_fc_tl_mem *tl, u8 *val) | |
8016e29f HS |
1841 | { |
1842 | struct inode *inode; | |
a7ba36bc | 1843 | struct ext4_fc_del_range lrange; |
8016e29f HS |
1844 | struct ext4_map_blocks map; |
1845 | ext4_lblk_t cur, remaining; | |
1846 | int ret; | |
1847 | ||
a7ba36bc HS |
1848 | memcpy(&lrange, val, sizeof(lrange)); |
1849 | cur = le32_to_cpu(lrange.fc_lblk); | |
1850 | remaining = le32_to_cpu(lrange.fc_len); | |
8016e29f HS |
1851 | |
1852 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, | |
a7ba36bc | 1853 | le32_to_cpu(lrange.fc_ino), cur, remaining); |
8016e29f | 1854 | |
a7ba36bc | 1855 | inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); |
23dd561a | 1856 | if (IS_ERR(inode)) { |
4978c659 | 1857 | ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino)); |
8016e29f HS |
1858 | return 0; |
1859 | } | |
1860 | ||
1861 | ret = ext4_fc_record_modified_inode(sb, inode->i_ino); | |
cdce59a1 RH |
1862 | if (ret) |
1863 | goto out; | |
8016e29f | 1864 | |
4978c659 | 1865 | ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n", |
a7ba36bc HS |
1866 | inode->i_ino, le32_to_cpu(lrange.fc_lblk), |
1867 | le32_to_cpu(lrange.fc_len)); | |
8016e29f HS |
1868 | while (remaining > 0) { |
1869 | map.m_lblk = cur; | |
1870 | map.m_len = remaining; | |
1871 | ||
1872 | ret = ext4_map_blocks(NULL, inode, &map, 0); | |
cdce59a1 RH |
1873 | if (ret < 0) |
1874 | goto out; | |
8016e29f HS |
1875 | if (ret > 0) { |
1876 | remaining -= ret; | |
1877 | cur += ret; | |
1878 | ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); | |
1879 | } else { | |
1880 | remaining -= map.m_len; | |
1881 | cur += map.m_len; | |
1882 | } | |
1883 | } | |
1884 | ||
0b5b5a62 | 1885 | down_write(&EXT4_I(inode)->i_data_sem); |
8fca8a2b XY |
1886 | ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), |
1887 | le32_to_cpu(lrange.fc_lblk) + | |
1888 | le32_to_cpu(lrange.fc_len) - 1); | |
0b5b5a62 | 1889 | up_write(&EXT4_I(inode)->i_data_sem); |
cdce59a1 RH |
1890 | if (ret) |
1891 | goto out; | |
8016e29f HS |
1892 | ext4_ext_replay_shrink_inode(inode, |
1893 | i_size_read(inode) >> sb->s_blocksize_bits); | |
1894 | ext4_mark_inode_dirty(NULL, inode); | |
cdce59a1 | 1895 | out: |
8016e29f | 1896 | iput(inode); |
8016e29f HS |
1897 | return 0; |
1898 | } | |
1899 | ||
8016e29f HS |
1900 | static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) |
1901 | { | |
1902 | struct ext4_fc_replay_state *state; | |
1903 | struct inode *inode; | |
1904 | struct ext4_ext_path *path = NULL; | |
1905 | struct ext4_map_blocks map; | |
1906 | int i, ret, j; | |
1907 | ext4_lblk_t cur, end; | |
1908 | ||
1909 | state = &EXT4_SB(sb)->s_fc_replay_state; | |
1910 | for (i = 0; i < state->fc_modified_inodes_used; i++) { | |
1911 | inode = ext4_iget(sb, state->fc_modified_inodes[i], | |
1912 | EXT4_IGET_NORMAL); | |
23dd561a | 1913 | if (IS_ERR(inode)) { |
4978c659 | 1914 | ext4_debug("Inode %d not found.", |
8016e29f HS |
1915 | state->fc_modified_inodes[i]); |
1916 | continue; | |
1917 | } | |
1918 | cur = 0; | |
1919 | end = EXT_MAX_BLOCKS; | |
1ebf2178 HS |
1920 | if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { |
1921 | iput(inode); | |
1922 | continue; | |
1923 | } | |
8016e29f HS |
1924 | while (cur < end) { |
1925 | map.m_lblk = cur; | |
1926 | map.m_len = end - cur; | |
1927 | ||
1928 | ret = ext4_map_blocks(NULL, inode, &map, 0); | |
1929 | if (ret < 0) | |
1930 | break; | |
1931 | ||
1932 | if (ret > 0) { | |
1933 | path = ext4_find_extent(inode, map.m_lblk, NULL, 0); | |
23dd561a | 1934 | if (!IS_ERR(path)) { |
8016e29f HS |
1935 | for (j = 0; j < path->p_depth; j++) |
1936 | ext4_mb_mark_bb(inode->i_sb, | |
1937 | path[j].p_block, 1, 1); | |
7ff5fdda | 1938 | ext4_free_ext_path(path); |
8016e29f HS |
1939 | } |
1940 | cur += ret; | |
1941 | ext4_mb_mark_bb(inode->i_sb, map.m_pblk, | |
1942 | map.m_len, 1); | |
1943 | } else { | |
1944 | cur = cur + (map.m_len ? map.m_len : 1); | |
1945 | } | |
1946 | } | |
1947 | iput(inode); | |
1948 | } | |
1949 | } | |
1950 | ||
1951 | /* | |
1952 | * Check if block is in excluded regions for block allocation. The simple | |
1953 | * allocator that runs during replay phase is calls this function to see | |
1954 | * if it is okay to use a block. | |
1955 | */ | |
1956 | bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) | |
1957 | { | |
1958 | int i; | |
1959 | struct ext4_fc_replay_state *state; | |
1960 | ||
1961 | state = &EXT4_SB(sb)->s_fc_replay_state; | |
1962 | for (i = 0; i < state->fc_regions_valid; i++) { | |
1963 | if (state->fc_regions[i].ino == 0 || | |
1964 | state->fc_regions[i].len == 0) | |
1965 | continue; | |
dbaafbad RH |
1966 | if (in_range(blk, state->fc_regions[i].pblk, |
1967 | state->fc_regions[i].len)) | |
8016e29f HS |
1968 | return true; |
1969 | } | |
1970 | return false; | |
1971 | } | |
1972 | ||
1973 | /* Cleanup function called after replay */ | |
1974 | void ext4_fc_replay_cleanup(struct super_block *sb) | |
1975 | { | |
1976 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1977 | ||
1978 | sbi->s_mount_state &= ~EXT4_FC_REPLAY; | |
1979 | kfree(sbi->s_fc_replay_state.fc_regions); | |
1980 | kfree(sbi->s_fc_replay_state.fc_modified_inodes); | |
1981 | } | |
1982 | ||
64b4a25c EB |
1983 | static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi, |
1984 | int tag, int len) | |
1b45cc5c | 1985 | { |
64b4a25c | 1986 | switch (tag) { |
1b45cc5c | 1987 | case EXT4_FC_TAG_ADD_RANGE: |
64b4a25c | 1988 | return len == sizeof(struct ext4_fc_add_range); |
1b45cc5c | 1989 | case EXT4_FC_TAG_DEL_RANGE: |
64b4a25c EB |
1990 | return len == sizeof(struct ext4_fc_del_range); |
1991 | case EXT4_FC_TAG_CREAT: | |
1b45cc5c YB |
1992 | case EXT4_FC_TAG_LINK: |
1993 | case EXT4_FC_TAG_UNLINK: | |
64b4a25c EB |
1994 | len -= sizeof(struct ext4_fc_dentry_info); |
1995 | return len >= 1 && len <= EXT4_NAME_LEN; | |
1b45cc5c | 1996 | case EXT4_FC_TAG_INODE: |
64b4a25c EB |
1997 | len -= sizeof(struct ext4_fc_inode); |
1998 | return len >= EXT4_GOOD_OLD_INODE_SIZE && | |
1999 | len <= sbi->s_inode_size; | |
1b45cc5c | 2000 | case EXT4_FC_TAG_PAD: |
64b4a25c EB |
2001 | return true; /* padding can have any length */ |
2002 | case EXT4_FC_TAG_TAIL: | |
2003 | return len >= sizeof(struct ext4_fc_tail); | |
2004 | case EXT4_FC_TAG_HEAD: | |
2005 | return len == sizeof(struct ext4_fc_head); | |
1b45cc5c | 2006 | } |
64b4a25c | 2007 | return false; |
1b45cc5c YB |
2008 | } |
2009 | ||
8016e29f HS |
2010 | /* |
2011 | * Recovery Scan phase handler | |
2012 | * | |
2013 | * This function is called during the scan phase and is responsible | |
2014 | * for doing following things: | |
2015 | * - Make sure the fast commit area has valid tags for replay | |
2016 | * - Count number of tags that need to be replayed by the replay handler | |
2017 | * - Verify CRC | |
2018 | * - Create a list of excluded blocks for allocation during replay phase | |
2019 | * | |
2020 | * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is | |
2021 | * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP | |
2022 | * to indicate that scan has finished and JBD2 can now start replay phase. | |
2023 | * It returns a negative error to indicate that there was an error. At the end | |
2024 | * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set | |
2025 | * to indicate the number of tags that need to replayed during the replay phase. | |
2026 | */ | |
2027 | static int ext4_fc_replay_scan(journal_t *journal, | |
2028 | struct buffer_head *bh, int off, | |
2029 | tid_t expected_tid) | |
2030 | { | |
2031 | struct super_block *sb = journal->j_private; | |
2032 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
2033 | struct ext4_fc_replay_state *state; | |
2034 | int ret = JBD2_FC_REPLAY_CONTINUE; | |
a7ba36bc | 2035 | struct ext4_fc_add_range ext; |
11768cfd | 2036 | struct ext4_fc_tl_mem tl; |
a7ba36bc HS |
2037 | struct ext4_fc_tail tail; |
2038 | __u8 *start, *end, *cur, *val; | |
2039 | struct ext4_fc_head head; | |
8016e29f HS |
2040 | struct ext4_extent *ex; |
2041 | ||
2042 | state = &sbi->s_fc_replay_state; | |
2043 | ||
2044 | start = (u8 *)bh->b_data; | |
48a6a66d | 2045 | end = start + journal->j_blocksize; |
8016e29f HS |
2046 | |
2047 | if (state->fc_replay_expected_off == 0) { | |
2048 | state->fc_cur_tag = 0; | |
2049 | state->fc_replay_num_tags = 0; | |
2050 | state->fc_crc = 0; | |
2051 | state->fc_regions = NULL; | |
2052 | state->fc_regions_valid = state->fc_regions_used = | |
2053 | state->fc_regions_size = 0; | |
2054 | /* Check if we can stop early */ | |
2055 | if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) | |
2056 | != EXT4_FC_TAG_HEAD) | |
2057 | return 0; | |
2058 | } | |
2059 | ||
2060 | if (off != state->fc_replay_expected_off) { | |
2061 | ret = -EFSCORRUPTED; | |
2062 | goto out_err; | |
2063 | } | |
2064 | ||
2065 | state->fc_replay_expected_off++; | |
48a6a66d | 2066 | for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; |
dcc58274 YB |
2067 | cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { |
2068 | ext4_fc_get_tl(&tl, cur); | |
fdc2a3c7 | 2069 | val = cur + EXT4_FC_TAG_BASE_LEN; |
64b4a25c EB |
2070 | if (tl.fc_len > end - val || |
2071 | !ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) { | |
1b45cc5c YB |
2072 | ret = state->fc_replay_num_tags ? |
2073 | JBD2_FC_REPLAY_STOP : -ECANCELED; | |
2074 | goto out_err; | |
2075 | } | |
4978c659 | 2076 | ext4_debug("Scan phase, tag:%s, blk %lld\n", |
dcc58274 YB |
2077 | tag2str(tl.fc_tag), bh->b_blocknr); |
2078 | switch (tl.fc_tag) { | |
8016e29f | 2079 | case EXT4_FC_TAG_ADD_RANGE: |
a7ba36bc HS |
2080 | memcpy(&ext, val, sizeof(ext)); |
2081 | ex = (struct ext4_extent *)&ext.fc_ex; | |
8016e29f | 2082 | ret = ext4_fc_record_regions(sb, |
a7ba36bc | 2083 | le32_to_cpu(ext.fc_ino), |
8016e29f | 2084 | le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), |
599ea31d | 2085 | ext4_ext_get_actual_len(ex), 0); |
8016e29f HS |
2086 | if (ret < 0) |
2087 | break; | |
2088 | ret = JBD2_FC_REPLAY_CONTINUE; | |
2089 | fallthrough; | |
2090 | case EXT4_FC_TAG_DEL_RANGE: | |
2091 | case EXT4_FC_TAG_LINK: | |
2092 | case EXT4_FC_TAG_UNLINK: | |
2093 | case EXT4_FC_TAG_CREAT: | |
2094 | case EXT4_FC_TAG_INODE: | |
2095 | case EXT4_FC_TAG_PAD: | |
2096 | state->fc_cur_tag++; | |
a7ba36bc | 2097 | state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, |
dcc58274 | 2098 | EXT4_FC_TAG_BASE_LEN + tl.fc_len); |
8016e29f HS |
2099 | break; |
2100 | case EXT4_FC_TAG_TAIL: | |
2101 | state->fc_cur_tag++; | |
a7ba36bc HS |
2102 | memcpy(&tail, val, sizeof(tail)); |
2103 | state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, | |
fdc2a3c7 | 2104 | EXT4_FC_TAG_BASE_LEN + |
8016e29f HS |
2105 | offsetof(struct ext4_fc_tail, |
2106 | fc_crc)); | |
a7ba36bc HS |
2107 | if (le32_to_cpu(tail.fc_tid) == expected_tid && |
2108 | le32_to_cpu(tail.fc_crc) == state->fc_crc) { | |
8016e29f HS |
2109 | state->fc_replay_num_tags = state->fc_cur_tag; |
2110 | state->fc_regions_valid = | |
2111 | state->fc_regions_used; | |
2112 | } else { | |
2113 | ret = state->fc_replay_num_tags ? | |
2114 | JBD2_FC_REPLAY_STOP : -EFSBADCRC; | |
2115 | } | |
2116 | state->fc_crc = 0; | |
2117 | break; | |
2118 | case EXT4_FC_TAG_HEAD: | |
a7ba36bc HS |
2119 | memcpy(&head, val, sizeof(head)); |
2120 | if (le32_to_cpu(head.fc_features) & | |
8016e29f HS |
2121 | ~EXT4_FC_SUPPORTED_FEATURES) { |
2122 | ret = -EOPNOTSUPP; | |
2123 | break; | |
2124 | } | |
a7ba36bc | 2125 | if (le32_to_cpu(head.fc_tid) != expected_tid) { |
8016e29f HS |
2126 | ret = JBD2_FC_REPLAY_STOP; |
2127 | break; | |
2128 | } | |
2129 | state->fc_cur_tag++; | |
a7ba36bc | 2130 | state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, |
dcc58274 | 2131 | EXT4_FC_TAG_BASE_LEN + tl.fc_len); |
8016e29f HS |
2132 | break; |
2133 | default: | |
2134 | ret = state->fc_replay_num_tags ? | |
2135 | JBD2_FC_REPLAY_STOP : -ECANCELED; | |
2136 | } | |
2137 | if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) | |
2138 | break; | |
2139 | } | |
2140 | ||
2141 | out_err: | |
2142 | trace_ext4_fc_replay_scan(sb, ret, off); | |
2143 | return ret; | |
2144 | } | |
2145 | ||
5b849b5f HS |
2146 | /* |
2147 | * Main recovery path entry point. | |
8016e29f | 2148 | * The meaning of return codes is similar as above. |
5b849b5f HS |
2149 | */ |
2150 | static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, | |
2151 | enum passtype pass, int off, tid_t expected_tid) | |
2152 | { | |
8016e29f HS |
2153 | struct super_block *sb = journal->j_private; |
2154 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
11768cfd | 2155 | struct ext4_fc_tl_mem tl; |
a7ba36bc | 2156 | __u8 *start, *end, *cur, *val; |
8016e29f HS |
2157 | int ret = JBD2_FC_REPLAY_CONTINUE; |
2158 | struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; | |
a7ba36bc | 2159 | struct ext4_fc_tail tail; |
8016e29f HS |
2160 | |
2161 | if (pass == PASS_SCAN) { | |
2162 | state->fc_current_pass = PASS_SCAN; | |
2163 | return ext4_fc_replay_scan(journal, bh, off, expected_tid); | |
2164 | } | |
2165 | ||
2166 | if (state->fc_current_pass != pass) { | |
2167 | state->fc_current_pass = pass; | |
2168 | sbi->s_mount_state |= EXT4_FC_REPLAY; | |
2169 | } | |
2170 | if (!sbi->s_fc_replay_state.fc_replay_num_tags) { | |
4978c659 | 2171 | ext4_debug("Replay stops\n"); |
8016e29f HS |
2172 | ext4_fc_set_bitmaps_and_counters(sb); |
2173 | return 0; | |
2174 | } | |
2175 | ||
2176 | #ifdef CONFIG_EXT4_DEBUG | |
2177 | if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { | |
2178 | pr_warn("Dropping fc block %d because max_replay set\n", off); | |
2179 | return JBD2_FC_REPLAY_STOP; | |
2180 | } | |
2181 | #endif | |
2182 | ||
2183 | start = (u8 *)bh->b_data; | |
48a6a66d | 2184 | end = start + journal->j_blocksize; |
8016e29f | 2185 | |
48a6a66d | 2186 | for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; |
dcc58274 YB |
2187 | cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { |
2188 | ext4_fc_get_tl(&tl, cur); | |
fdc2a3c7 | 2189 | val = cur + EXT4_FC_TAG_BASE_LEN; |
a7ba36bc | 2190 | |
8016e29f HS |
2191 | if (state->fc_replay_num_tags == 0) { |
2192 | ret = JBD2_FC_REPLAY_STOP; | |
2193 | ext4_fc_set_bitmaps_and_counters(sb); | |
2194 | break; | |
2195 | } | |
1b45cc5c | 2196 | |
dcc58274 | 2197 | ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); |
8016e29f | 2198 | state->fc_replay_num_tags--; |
dcc58274 | 2199 | switch (tl.fc_tag) { |
8016e29f | 2200 | case EXT4_FC_TAG_LINK: |
a7ba36bc | 2201 | ret = ext4_fc_replay_link(sb, &tl, val); |
8016e29f HS |
2202 | break; |
2203 | case EXT4_FC_TAG_UNLINK: | |
a7ba36bc | 2204 | ret = ext4_fc_replay_unlink(sb, &tl, val); |
8016e29f HS |
2205 | break; |
2206 | case EXT4_FC_TAG_ADD_RANGE: | |
a7ba36bc | 2207 | ret = ext4_fc_replay_add_range(sb, &tl, val); |
8016e29f HS |
2208 | break; |
2209 | case EXT4_FC_TAG_CREAT: | |
a7ba36bc | 2210 | ret = ext4_fc_replay_create(sb, &tl, val); |
8016e29f HS |
2211 | break; |
2212 | case EXT4_FC_TAG_DEL_RANGE: | |
a7ba36bc | 2213 | ret = ext4_fc_replay_del_range(sb, &tl, val); |
8016e29f HS |
2214 | break; |
2215 | case EXT4_FC_TAG_INODE: | |
a7ba36bc | 2216 | ret = ext4_fc_replay_inode(sb, &tl, val); |
8016e29f HS |
2217 | break; |
2218 | case EXT4_FC_TAG_PAD: | |
2219 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, | |
dcc58274 | 2220 | tl.fc_len, 0); |
8016e29f HS |
2221 | break; |
2222 | case EXT4_FC_TAG_TAIL: | |
dcc58274 YB |
2223 | trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, |
2224 | 0, tl.fc_len, 0); | |
a7ba36bc HS |
2225 | memcpy(&tail, val, sizeof(tail)); |
2226 | WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); | |
8016e29f HS |
2227 | break; |
2228 | case EXT4_FC_TAG_HEAD: | |
2229 | break; | |
2230 | default: | |
dcc58274 | 2231 | trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0); |
8016e29f HS |
2232 | ret = -ECANCELED; |
2233 | break; | |
2234 | } | |
2235 | if (ret < 0) | |
2236 | break; | |
2237 | ret = JBD2_FC_REPLAY_CONTINUE; | |
2238 | } | |
2239 | return ret; | |
5b849b5f HS |
2240 | } |
2241 | ||
6866d7b3 HS |
2242 | void ext4_fc_init(struct super_block *sb, journal_t *journal) |
2243 | { | |
5b849b5f HS |
2244 | /* |
2245 | * We set replay callback even if fast commit disabled because we may | |
2246 | * could still have fast commit blocks that need to be replayed even if | |
2247 | * fast commit has now been turned off. | |
2248 | */ | |
2249 | journal->j_fc_replay_callback = ext4_fc_replay; | |
6866d7b3 HS |
2250 | if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) |
2251 | return; | |
ff780b91 | 2252 | journal->j_fc_cleanup_callback = ext4_fc_cleanup; |
6866d7b3 | 2253 | } |
aa75f4d3 | 2254 | |
0fbcb525 EB |
2255 | static const char * const fc_ineligible_reasons[] = { |
2256 | [EXT4_FC_REASON_XATTR] = "Extended attributes changed", | |
2257 | [EXT4_FC_REASON_CROSS_RENAME] = "Cross rename", | |
2258 | [EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed", | |
2259 | [EXT4_FC_REASON_NOMEM] = "Insufficient memory", | |
2260 | [EXT4_FC_REASON_SWAP_BOOT] = "Swap boot", | |
2261 | [EXT4_FC_REASON_RESIZE] = "Resize", | |
2262 | [EXT4_FC_REASON_RENAME_DIR] = "Dir renamed", | |
2263 | [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op", | |
2264 | [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling", | |
2265 | [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename", | |
ce8c59d1 HS |
2266 | }; |
2267 | ||
2268 | int ext4_fc_info_show(struct seq_file *seq, void *v) | |
2269 | { | |
2270 | struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); | |
2271 | struct ext4_fc_stats *stats = &sbi->s_fc_stats; | |
2272 | int i; | |
2273 | ||
2274 | if (v != SEQ_START_TOKEN) | |
2275 | return 0; | |
2276 | ||
2277 | seq_printf(seq, | |
2278 | "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", | |
2279 | stats->fc_num_commits, stats->fc_ineligible_commits, | |
2280 | stats->fc_numblks, | |
0915e464 | 2281 | div_u64(stats->s_fc_avg_commit_time, 1000)); |
ce8c59d1 HS |
2282 | seq_puts(seq, "Ineligible reasons:\n"); |
2283 | for (i = 0; i < EXT4_FC_REASON_MAX; i++) | |
2284 | seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], | |
2285 | stats->fc_ineligible_reason_count[i]); | |
2286 | ||
2287 | return 0; | |
2288 | } | |
2289 | ||
aa75f4d3 HS |
2290 | int __init ext4_fc_init_dentry_cache(void) |
2291 | { | |
2292 | ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, | |
2293 | SLAB_RECLAIM_ACCOUNT); | |
2294 | ||
2295 | if (ext4_fc_dentry_cachep == NULL) | |
2296 | return -ENOMEM; | |
2297 | ||
2298 | return 0; | |
2299 | } | |
ab047d51 SAS |
2300 | |
2301 | void ext4_fc_destroy_dentry_cache(void) | |
2302 | { | |
2303 | kmem_cache_destroy(ext4_fc_dentry_cachep); | |
2304 | } |