Commit | Line | Data |
---|---|---|
6f643c57 SR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (c) 2022 Fujitsu. All Rights Reserved. | |
4 | */ | |
5 | ||
6 | #include "xfs.h" | |
7 | #include "xfs_shared.h" | |
8 | #include "xfs_format.h" | |
9 | #include "xfs_log_format.h" | |
10 | #include "xfs_trans_resv.h" | |
11 | #include "xfs_mount.h" | |
12 | #include "xfs_alloc.h" | |
13 | #include "xfs_bit.h" | |
14 | #include "xfs_btree.h" | |
15 | #include "xfs_inode.h" | |
16 | #include "xfs_icache.h" | |
17 | #include "xfs_rmap.h" | |
18 | #include "xfs_rmap_btree.h" | |
19 | #include "xfs_rtalloc.h" | |
20 | #include "xfs_trans.h" | |
6614a3c3 | 21 | #include "xfs_ag.h" |
6f643c57 SR |
22 | |
23 | #include <linux/mm.h> | |
24 | #include <linux/dax.h> | |
fa422b35 | 25 | #include <linux/fs.h> |
6f643c57 | 26 | |
e033f40b | 27 | struct xfs_failure_info { |
6f643c57 SR |
28 | xfs_agblock_t startblock; |
29 | xfs_extlen_t blockcount; | |
30 | int mf_flags; | |
e033f40b | 31 | bool want_shutdown; |
6f643c57 SR |
32 | }; |
33 | ||
34 | static pgoff_t | |
35 | xfs_failure_pgoff( | |
36 | struct xfs_mount *mp, | |
37 | const struct xfs_rmap_irec *rec, | |
e033f40b | 38 | const struct xfs_failure_info *notify) |
6f643c57 SR |
39 | { |
40 | loff_t pos = XFS_FSB_TO_B(mp, rec->rm_offset); | |
41 | ||
42 | if (notify->startblock > rec->rm_startblock) | |
43 | pos += XFS_FSB_TO_B(mp, | |
44 | notify->startblock - rec->rm_startblock); | |
45 | return pos >> PAGE_SHIFT; | |
46 | } | |
47 | ||
48 | static unsigned long | |
49 | xfs_failure_pgcnt( | |
50 | struct xfs_mount *mp, | |
51 | const struct xfs_rmap_irec *rec, | |
e033f40b | 52 | const struct xfs_failure_info *notify) |
6f643c57 SR |
53 | { |
54 | xfs_agblock_t end_rec; | |
55 | xfs_agblock_t end_notify; | |
56 | xfs_agblock_t start_cross; | |
57 | xfs_agblock_t end_cross; | |
58 | ||
59 | start_cross = max(rec->rm_startblock, notify->startblock); | |
60 | ||
61 | end_rec = rec->rm_startblock + rec->rm_blockcount; | |
62 | end_notify = notify->startblock + notify->blockcount; | |
63 | end_cross = min(end_rec, end_notify); | |
64 | ||
65 | return XFS_FSB_TO_B(mp, end_cross - start_cross) >> PAGE_SHIFT; | |
66 | } | |
67 | ||
68 | static int | |
69 | xfs_dax_failure_fn( | |
70 | struct xfs_btree_cur *cur, | |
71 | const struct xfs_rmap_irec *rec, | |
72 | void *data) | |
73 | { | |
74 | struct xfs_mount *mp = cur->bc_mp; | |
75 | struct xfs_inode *ip; | |
e033f40b | 76 | struct xfs_failure_info *notify = data; |
fa422b35 SR |
77 | struct address_space *mapping; |
78 | pgoff_t pgoff; | |
79 | unsigned long pgcnt; | |
6f643c57 SR |
80 | int error = 0; |
81 | ||
82 | if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || | |
83 | (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) { | |
fa422b35 SR |
84 | /* Continue the query because this isn't a failure. */ |
85 | if (notify->mf_flags & MF_MEM_PRE_REMOVE) | |
86 | return 0; | |
e033f40b DW |
87 | notify->want_shutdown = true; |
88 | return 0; | |
6f643c57 SR |
89 | } |
90 | ||
91 | /* Get files that incore, filter out others that are not in use. */ | |
92 | error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE, | |
93 | 0, &ip); | |
94 | /* Continue the rmap query if the inode isn't incore */ | |
95 | if (error == -ENODATA) | |
96 | return 0; | |
e033f40b DW |
97 | if (error) { |
98 | notify->want_shutdown = true; | |
99 | return 0; | |
100 | } | |
6f643c57 | 101 | |
fa422b35 SR |
102 | mapping = VFS_I(ip)->i_mapping; |
103 | pgoff = xfs_failure_pgoff(mp, rec, notify); | |
104 | pgcnt = xfs_failure_pgcnt(mp, rec, notify); | |
105 | ||
106 | /* Continue the rmap query if the inode isn't a dax file. */ | |
107 | if (dax_mapping(mapping)) | |
108 | error = mf_dax_kill_procs(mapping, pgoff, pgcnt, | |
109 | notify->mf_flags); | |
110 | ||
111 | /* Invalidate the cache in dax pages. */ | |
112 | if (notify->mf_flags & MF_MEM_PRE_REMOVE) | |
113 | invalidate_inode_pages2_range(mapping, pgoff, | |
114 | pgoff + pgcnt - 1); | |
115 | ||
6f643c57 SR |
116 | xfs_irele(ip); |
117 | return error; | |
118 | } | |
119 | ||
fa422b35 SR |
120 | static int |
121 | xfs_dax_notify_failure_freeze( | |
122 | struct xfs_mount *mp) | |
123 | { | |
124 | struct super_block *sb = mp->m_super; | |
125 | int error; | |
126 | ||
127 | error = freeze_super(sb, FREEZE_HOLDER_KERNEL); | |
128 | if (error) | |
129 | xfs_emerg(mp, "already frozen by kernel, err=%d", error); | |
130 | ||
131 | return error; | |
132 | } | |
133 | ||
134 | static void | |
135 | xfs_dax_notify_failure_thaw( | |
136 | struct xfs_mount *mp, | |
137 | bool kernel_frozen) | |
138 | { | |
139 | struct super_block *sb = mp->m_super; | |
140 | int error; | |
141 | ||
142 | if (kernel_frozen) { | |
143 | error = thaw_super(sb, FREEZE_HOLDER_KERNEL); | |
144 | if (error) | |
145 | xfs_emerg(mp, "still frozen after notify failure, err=%d", | |
146 | error); | |
147 | } | |
148 | ||
149 | /* | |
150 | * Also thaw userspace call anyway because the device is about to be | |
151 | * removed immediately. | |
152 | */ | |
153 | thaw_super(sb, FREEZE_HOLDER_USERSPACE); | |
154 | } | |
155 | ||
6f643c57 SR |
156 | static int |
157 | xfs_dax_notify_ddev_failure( | |
158 | struct xfs_mount *mp, | |
159 | xfs_daddr_t daddr, | |
160 | xfs_daddr_t bblen, | |
161 | int mf_flags) | |
162 | { | |
e033f40b | 163 | struct xfs_failure_info notify = { .mf_flags = mf_flags }; |
6f643c57 SR |
164 | struct xfs_trans *tp = NULL; |
165 | struct xfs_btree_cur *cur = NULL; | |
166 | struct xfs_buf *agf_bp = NULL; | |
167 | int error = 0; | |
fa422b35 | 168 | bool kernel_frozen = false; |
6f643c57 SR |
169 | xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr); |
170 | xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); | |
5cf32f63 SR |
171 | xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, |
172 | daddr + bblen - 1); | |
6f643c57 SR |
173 | xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno); |
174 | ||
fa422b35 SR |
175 | if (mf_flags & MF_MEM_PRE_REMOVE) { |
176 | xfs_info(mp, "Device is about to be removed!"); | |
177 | /* | |
178 | * Freeze fs to prevent new mappings from being created. | |
179 | * - Keep going on if others already hold the kernel forzen. | |
180 | * - Keep going on if other errors too because this device is | |
181 | * starting to fail. | |
182 | * - If kernel frozen state is hold successfully here, thaw it | |
183 | * here as well at the end. | |
184 | */ | |
185 | kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0; | |
186 | } | |
187 | ||
6f643c57 SR |
188 | error = xfs_trans_alloc_empty(mp, &tp); |
189 | if (error) | |
fa422b35 | 190 | goto out; |
6f643c57 SR |
191 | |
192 | for (; agno <= end_agno; agno++) { | |
193 | struct xfs_rmap_irec ri_low = { }; | |
194 | struct xfs_rmap_irec ri_high; | |
6f643c57 | 195 | struct xfs_agf *agf; |
6614a3c3 | 196 | struct xfs_perag *pag; |
3c90c01e | 197 | xfs_agblock_t range_agend; |
6f643c57 | 198 | |
6614a3c3 LT |
199 | pag = xfs_perag_get(mp, agno); |
200 | error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); | |
201 | if (error) { | |
202 | xfs_perag_put(pag); | |
6f643c57 | 203 | break; |
6614a3c3 | 204 | } |
6f643c57 | 205 | |
6614a3c3 | 206 | cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag); |
6f643c57 SR |
207 | |
208 | /* | |
209 | * Set the rmap range from ri_low to ri_high, which represents | |
210 | * a [start, end] where we looking for the files or metadata. | |
211 | */ | |
212 | memset(&ri_high, 0xFF, sizeof(ri_high)); | |
213 | ri_low.rm_startblock = XFS_FSB_TO_AGBNO(mp, fsbno); | |
214 | if (agno == end_agno) | |
215 | ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno); | |
216 | ||
217 | agf = agf_bp->b_addr; | |
3c90c01e | 218 | range_agend = min(be32_to_cpu(agf->agf_length) - 1, |
6f643c57 SR |
219 | ri_high.rm_startblock); |
220 | notify.startblock = ri_low.rm_startblock; | |
3c90c01e | 221 | notify.blockcount = range_agend + 1 - ri_low.rm_startblock; |
6f643c57 SR |
222 | |
223 | error = xfs_rmap_query_range(cur, &ri_low, &ri_high, | |
224 | xfs_dax_failure_fn, ¬ify); | |
225 | xfs_btree_del_cursor(cur, error); | |
226 | xfs_trans_brelse(tp, agf_bp); | |
6614a3c3 | 227 | xfs_perag_put(pag); |
6f643c57 SR |
228 | if (error) |
229 | break; | |
230 | ||
231 | fsbno = XFS_AGB_TO_FSB(mp, agno + 1, 0); | |
232 | } | |
233 | ||
234 | xfs_trans_cancel(tp); | |
fa422b35 SR |
235 | |
236 | /* | |
237 | * Shutdown fs from a force umount in pre-remove case which won't fail, | |
238 | * so errors can be ignored. Otherwise, shutdown the filesystem with | |
239 | * CORRUPT flag if error occured or notify.want_shutdown was set during | |
240 | * RMAP querying. | |
241 | */ | |
242 | if (mf_flags & MF_MEM_PRE_REMOVE) | |
243 | xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); | |
244 | else if (error || notify.want_shutdown) { | |
e033f40b DW |
245 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK); |
246 | if (!error) | |
247 | error = -EFSCORRUPTED; | |
248 | } | |
fa422b35 SR |
249 | |
250 | out: | |
251 | /* Thaw the fs if it has been frozen before. */ | |
252 | if (mf_flags & MF_MEM_PRE_REMOVE) | |
253 | xfs_dax_notify_failure_thaw(mp, kernel_frozen); | |
254 | ||
6f643c57 SR |
255 | return error; |
256 | } | |
257 | ||
258 | static int | |
259 | xfs_dax_notify_failure( | |
260 | struct dax_device *dax_dev, | |
261 | u64 offset, | |
262 | u64 len, | |
263 | int mf_flags) | |
264 | { | |
265 | struct xfs_mount *mp = dax_holder(dax_dev); | |
266 | u64 ddev_start; | |
267 | u64 ddev_end; | |
268 | ||
fd63612a | 269 | if (!(mp->m_super->s_flags & SB_BORN)) { |
6f643c57 SR |
270 | xfs_warn(mp, "filesystem is not ready for notify_failure()!"); |
271 | return -EIO; | |
272 | } | |
273 | ||
274 | if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) { | |
b14d067e | 275 | xfs_debug(mp, |
6f643c57 SR |
276 | "notify_failure() not supported on realtime device!"); |
277 | return -EOPNOTSUPP; | |
278 | } | |
279 | ||
280 | if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev && | |
281 | mp->m_logdev_targp != mp->m_ddev_targp) { | |
fa422b35 SR |
282 | /* |
283 | * In the pre-remove case the failure notification is attempting | |
284 | * to trigger a force unmount. The expectation is that the | |
285 | * device is still present, but its removal is in progress and | |
286 | * can not be cancelled, proceed with accessing the log device. | |
287 | */ | |
288 | if (mf_flags & MF_MEM_PRE_REMOVE) | |
289 | return 0; | |
6f643c57 SR |
290 | xfs_err(mp, "ondisk log corrupt, shutting down fs!"); |
291 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK); | |
292 | return -EFSCORRUPTED; | |
293 | } | |
294 | ||
295 | if (!xfs_has_rmapbt(mp)) { | |
b14d067e | 296 | xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); |
6f643c57 SR |
297 | return -EOPNOTSUPP; |
298 | } | |
299 | ||
300 | ddev_start = mp->m_ddev_targp->bt_dax_part_off; | |
301 | ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1; | |
302 | ||
fa422b35 SR |
303 | /* Notify failure on the whole device. */ |
304 | if (offset == 0 && len == U64_MAX) { | |
305 | offset = ddev_start; | |
306 | len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev); | |
307 | } | |
308 | ||
6f643c57 | 309 | /* Ignore the range out of filesystem area */ |
5cf32f63 | 310 | if (offset + len - 1 < ddev_start) |
6f643c57 SR |
311 | return -ENXIO; |
312 | if (offset > ddev_end) | |
313 | return -ENXIO; | |
314 | ||
315 | /* Calculate the real range when it touches the boundary */ | |
316 | if (offset > ddev_start) | |
317 | offset -= ddev_start; | |
318 | else { | |
319 | len -= ddev_start - offset; | |
320 | offset = 0; | |
321 | } | |
5cf32f63 SR |
322 | if (offset + len - 1 > ddev_end) |
323 | len = ddev_end - offset + 1; | |
6f643c57 SR |
324 | |
325 | return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len), | |
326 | mf_flags); | |
327 | } | |
328 | ||
329 | const struct dax_holder_operations xfs_dax_holder_operations = { | |
330 | .notify_failure = xfs_dax_notify_failure, | |
331 | }; |