d37c5c89c72878b416159e8680e58831ad9fd3d9
[linux-block.git] / fs / erofs / zmap.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018-2019 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <asm/unaligned.h>
8 #include <trace/events/erofs.h>
9
10 struct z_erofs_maprecorder {
11         struct inode *inode;
12         struct erofs_map_blocks *map;
13         void *kaddr;
14
15         unsigned long lcn;
16         /* compression extent information gathered */
17         u8  type, headtype;
18         u16 clusterofs;
19         u16 delta[2];
20         erofs_blk_t pblk, compressedblks;
21         erofs_off_t nextpackoff;
22         bool partialref;
23 };
24
25 static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
26                                          unsigned long lcn)
27 {
28         struct inode *const inode = m->inode;
29         struct erofs_inode *const vi = EROFS_I(inode);
30         const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) +
31                         vi->inode_isize + vi->xattr_isize) +
32                         lcn * sizeof(struct z_erofs_lcluster_index);
33         struct z_erofs_lcluster_index *di;
34         unsigned int advise, type;
35
36         m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
37                                       erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
38         if (IS_ERR(m->kaddr))
39                 return PTR_ERR(m->kaddr);
40
41         m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
42         m->lcn = lcn;
43         di = m->kaddr + erofs_blkoff(inode->i_sb, pos);
44
45         advise = le16_to_cpu(di->di_advise);
46         type = (advise >> Z_EROFS_LI_LCLUSTER_TYPE_BIT) &
47                 ((1 << Z_EROFS_LI_LCLUSTER_TYPE_BITS) - 1);
48         switch (type) {
49         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
50                 m->clusterofs = 1 << vi->z_logical_clusterbits;
51                 m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
52                 if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
53                         if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
54                                         Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
55                                 DBG_BUGON(1);
56                                 return -EFSCORRUPTED;
57                         }
58                         m->compressedblks = m->delta[0] &
59                                 ~Z_EROFS_LI_D0_CBLKCNT;
60                         m->delta[0] = 1;
61                 }
62                 m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
63                 break;
64         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
65         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
66         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
67                 if (advise & Z_EROFS_LI_PARTIAL_REF)
68                         m->partialref = true;
69                 m->clusterofs = le16_to_cpu(di->di_clusterofs);
70                 if (m->clusterofs >= 1 << vi->z_logical_clusterbits) {
71                         DBG_BUGON(1);
72                         return -EFSCORRUPTED;
73                 }
74                 m->pblk = le32_to_cpu(di->di_u.blkaddr);
75                 break;
76         default:
77                 DBG_BUGON(1);
78                 return -EOPNOTSUPP;
79         }
80         m->type = type;
81         return 0;
82 }
83
84 static unsigned int decode_compactedbits(unsigned int lobits,
85                                          unsigned int lomask,
86                                          u8 *in, unsigned int pos, u8 *type)
87 {
88         const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
89         const unsigned int lo = v & lomask;
90
91         *type = (v >> lobits) & 3;
92         return lo;
93 }
94
95 static int get_compacted_la_distance(unsigned int lclusterbits,
96                                      unsigned int encodebits,
97                                      unsigned int vcnt, u8 *in, int i)
98 {
99         const unsigned int lomask = (1 << lclusterbits) - 1;
100         unsigned int lo, d1 = 0;
101         u8 type;
102
103         DBG_BUGON(i >= vcnt);
104
105         do {
106                 lo = decode_compactedbits(lclusterbits, lomask,
107                                           in, encodebits * i, &type);
108
109                 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
110                         return d1;
111                 ++d1;
112         } while (++i < vcnt);
113
114         /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
115         if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
116                 d1 += lo - 1;
117         return d1;
118 }
119
120 static int unpack_compacted_index(struct z_erofs_maprecorder *m,
121                                   unsigned int amortizedshift,
122                                   erofs_off_t pos, bool lookahead)
123 {
124         struct erofs_inode *const vi = EROFS_I(m->inode);
125         const unsigned int lclusterbits = vi->z_logical_clusterbits;
126         const unsigned int lomask = (1 << lclusterbits) - 1;
127         unsigned int vcnt, base, lo, encodebits, nblk, eofs;
128         int i;
129         u8 *in, type;
130         bool big_pcluster;
131
132         if (1 << amortizedshift == 4)
133                 vcnt = 2;
134         else if (1 << amortizedshift == 2 && lclusterbits == 12)
135                 vcnt = 16;
136         else
137                 return -EOPNOTSUPP;
138
139         /* it doesn't equal to round_up(..) */
140         m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
141                          (vcnt << amortizedshift);
142         big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
143         encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
144         eofs = erofs_blkoff(m->inode->i_sb, pos);
145         base = round_down(eofs, vcnt << amortizedshift);
146         in = m->kaddr + base;
147
148         i = (eofs - base) >> amortizedshift;
149
150         lo = decode_compactedbits(lclusterbits, lomask,
151                                   in, encodebits * i, &type);
152         m->type = type;
153         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
154                 m->clusterofs = 1 << lclusterbits;
155
156                 /* figure out lookahead_distance: delta[1] if needed */
157                 if (lookahead)
158                         m->delta[1] = get_compacted_la_distance(lclusterbits,
159                                                 encodebits, vcnt, in, i);
160                 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
161                         if (!big_pcluster) {
162                                 DBG_BUGON(1);
163                                 return -EFSCORRUPTED;
164                         }
165                         m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
166                         m->delta[0] = 1;
167                         return 0;
168                 } else if (i + 1 != (int)vcnt) {
169                         m->delta[0] = lo;
170                         return 0;
171                 }
172                 /*
173                  * since the last lcluster in the pack is special,
174                  * of which lo saves delta[1] rather than delta[0].
175                  * Hence, get delta[0] by the previous lcluster indirectly.
176                  */
177                 lo = decode_compactedbits(lclusterbits, lomask,
178                                           in, encodebits * (i - 1), &type);
179                 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
180                         lo = 0;
181                 else if (lo & Z_EROFS_LI_D0_CBLKCNT)
182                         lo = 1;
183                 m->delta[0] = lo + 1;
184                 return 0;
185         }
186         m->clusterofs = lo;
187         m->delta[0] = 0;
188         /* figout out blkaddr (pblk) for HEAD lclusters */
189         if (!big_pcluster) {
190                 nblk = 1;
191                 while (i > 0) {
192                         --i;
193                         lo = decode_compactedbits(lclusterbits, lomask,
194                                                   in, encodebits * i, &type);
195                         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
196                                 i -= lo;
197
198                         if (i >= 0)
199                                 ++nblk;
200                 }
201         } else {
202                 nblk = 0;
203                 while (i > 0) {
204                         --i;
205                         lo = decode_compactedbits(lclusterbits, lomask,
206                                                   in, encodebits * i, &type);
207                         if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
208                                 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
209                                         --i;
210                                         nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
211                                         continue;
212                                 }
213                                 /* bigpcluster shouldn't have plain d0 == 1 */
214                                 if (lo <= 1) {
215                                         DBG_BUGON(1);
216                                         return -EFSCORRUPTED;
217                                 }
218                                 i -= lo - 2;
219                                 continue;
220                         }
221                         ++nblk;
222                 }
223         }
224         in += (vcnt << amortizedshift) - sizeof(__le32);
225         m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
226         return 0;
227 }
228
229 static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
230                                             unsigned long lcn, bool lookahead)
231 {
232         struct inode *const inode = m->inode;
233         struct erofs_inode *const vi = EROFS_I(inode);
234         const unsigned int lclusterbits = vi->z_logical_clusterbits;
235         const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
236                 ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
237         unsigned int totalidx = erofs_iblks(inode);
238         unsigned int compacted_4b_initial, compacted_2b;
239         unsigned int amortizedshift;
240         erofs_off_t pos;
241
242         if (lclusterbits != 12)
243                 return -EOPNOTSUPP;
244
245         if (lcn >= totalidx)
246                 return -EINVAL;
247
248         m->lcn = lcn;
249         /* used to align to 32-byte (compacted_2b) alignment */
250         compacted_4b_initial = (32 - ebase % 32) / 4;
251         if (compacted_4b_initial == 32 / 4)
252                 compacted_4b_initial = 0;
253
254         if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
255             compacted_4b_initial < totalidx)
256                 compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
257         else
258                 compacted_2b = 0;
259
260         pos = ebase;
261         if (lcn < compacted_4b_initial) {
262                 amortizedshift = 2;
263                 goto out;
264         }
265         pos += compacted_4b_initial * 4;
266         lcn -= compacted_4b_initial;
267
268         if (lcn < compacted_2b) {
269                 amortizedshift = 1;
270                 goto out;
271         }
272         pos += compacted_2b * 2;
273         lcn -= compacted_2b;
274         amortizedshift = 2;
275 out:
276         pos += lcn * (1 << amortizedshift);
277         m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
278                                       erofs_blknr(inode->i_sb, pos), EROFS_KMAP);
279         if (IS_ERR(m->kaddr))
280                 return PTR_ERR(m->kaddr);
281         return unpack_compacted_index(m, amortizedshift, pos, lookahead);
282 }
283
284 static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
285                                           unsigned int lcn, bool lookahead)
286 {
287         const unsigned int datamode = EROFS_I(m->inode)->datalayout;
288
289         if (datamode == EROFS_INODE_COMPRESSED_FULL)
290                 return legacy_load_cluster_from_disk(m, lcn);
291
292         if (datamode == EROFS_INODE_COMPRESSED_COMPACT)
293                 return compacted_load_cluster_from_disk(m, lcn, lookahead);
294
295         return -EINVAL;
296 }
297
298 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
299                                    unsigned int lookback_distance)
300 {
301         struct erofs_inode *const vi = EROFS_I(m->inode);
302         const unsigned int lclusterbits = vi->z_logical_clusterbits;
303
304         while (m->lcn >= lookback_distance) {
305                 unsigned long lcn = m->lcn - lookback_distance;
306                 int err;
307
308                 /* load extent head logical cluster if needed */
309                 err = z_erofs_load_cluster_from_disk(m, lcn, false);
310                 if (err)
311                         return err;
312
313                 switch (m->type) {
314                 case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
315                         if (!m->delta[0]) {
316                                 erofs_err(m->inode->i_sb,
317                                           "invalid lookback distance 0 @ nid %llu",
318                                           vi->nid);
319                                 DBG_BUGON(1);
320                                 return -EFSCORRUPTED;
321                         }
322                         lookback_distance = m->delta[0];
323                         continue;
324                 case Z_EROFS_LCLUSTER_TYPE_PLAIN:
325                 case Z_EROFS_LCLUSTER_TYPE_HEAD1:
326                 case Z_EROFS_LCLUSTER_TYPE_HEAD2:
327                         m->headtype = m->type;
328                         m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
329                         return 0;
330                 default:
331                         erofs_err(m->inode->i_sb,
332                                   "unknown type %u @ lcn %lu of nid %llu",
333                                   m->type, lcn, vi->nid);
334                         DBG_BUGON(1);
335                         return -EOPNOTSUPP;
336                 }
337         }
338
339         erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
340                   vi->nid);
341         DBG_BUGON(1);
342         return -EFSCORRUPTED;
343 }
344
345 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
346                                             unsigned int initial_lcn)
347 {
348         struct super_block *sb = m->inode->i_sb;
349         struct erofs_inode *const vi = EROFS_I(m->inode);
350         struct erofs_map_blocks *const map = m->map;
351         const unsigned int lclusterbits = vi->z_logical_clusterbits;
352         unsigned long lcn;
353         int err;
354
355         DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
356                   m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
357                   m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
358         DBG_BUGON(m->type != m->headtype);
359
360         if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
361             ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
362              !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
363             ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
364              !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
365                 map->m_plen = 1ULL << lclusterbits;
366                 return 0;
367         }
368         lcn = m->lcn + 1;
369         if (m->compressedblks)
370                 goto out;
371
372         err = z_erofs_load_cluster_from_disk(m, lcn, false);
373         if (err)
374                 return err;
375
376         /*
377          * If the 1st NONHEAD lcluster has already been handled initially w/o
378          * valid compressedblks, which means at least it mustn't be CBLKCNT, or
379          * an internal implemenatation error is detected.
380          *
381          * The following code can also handle it properly anyway, but let's
382          * BUG_ON in the debugging mode only for developers to notice that.
383          */
384         DBG_BUGON(lcn == initial_lcn &&
385                   m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
386
387         switch (m->type) {
388         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
389         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
390         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
391                 /*
392                  * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
393                  * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
394                  */
395                 m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
396                 break;
397         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
398                 if (m->delta[0] != 1)
399                         goto err_bonus_cblkcnt;
400                 if (m->compressedblks)
401                         break;
402                 fallthrough;
403         default:
404                 erofs_err(m->inode->i_sb,
405                           "cannot found CBLKCNT @ lcn %lu of nid %llu",
406                           lcn, vi->nid);
407                 DBG_BUGON(1);
408                 return -EFSCORRUPTED;
409         }
410 out:
411         map->m_plen = erofs_pos(sb, m->compressedblks);
412         return 0;
413 err_bonus_cblkcnt:
414         erofs_err(m->inode->i_sb,
415                   "bogus CBLKCNT @ lcn %lu of nid %llu",
416                   lcn, vi->nid);
417         DBG_BUGON(1);
418         return -EFSCORRUPTED;
419 }
420
421 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
422 {
423         struct inode *inode = m->inode;
424         struct erofs_inode *vi = EROFS_I(inode);
425         struct erofs_map_blocks *map = m->map;
426         unsigned int lclusterbits = vi->z_logical_clusterbits;
427         u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
428         int err;
429
430         do {
431                 /* handle the last EOF pcluster (no next HEAD lcluster) */
432                 if ((lcn << lclusterbits) >= inode->i_size) {
433                         map->m_llen = inode->i_size - map->m_la;
434                         return 0;
435                 }
436
437                 err = z_erofs_load_cluster_from_disk(m, lcn, true);
438                 if (err)
439                         return err;
440
441                 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
442                         DBG_BUGON(!m->delta[1] &&
443                                   m->clusterofs != 1 << lclusterbits);
444                 } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
445                            m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 ||
446                            m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
447                         /* go on until the next HEAD lcluster */
448                         if (lcn != headlcn)
449                                 break;
450                         m->delta[1] = 1;
451                 } else {
452                         erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
453                                   m->type, lcn, vi->nid);
454                         DBG_BUGON(1);
455                         return -EOPNOTSUPP;
456                 }
457                 lcn += m->delta[1];
458         } while (m->delta[1]);
459
460         map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
461         return 0;
462 }
463
464 static int z_erofs_do_map_blocks(struct inode *inode,
465                                  struct erofs_map_blocks *map, int flags)
466 {
467         struct erofs_inode *const vi = EROFS_I(inode);
468         bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
469         bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
470         struct z_erofs_maprecorder m = {
471                 .inode = inode,
472                 .map = map,
473         };
474         int err = 0;
475         unsigned int lclusterbits, endoff;
476         unsigned long initial_lcn;
477         unsigned long long ofs, end;
478
479         lclusterbits = vi->z_logical_clusterbits;
480         ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
481         initial_lcn = ofs >> lclusterbits;
482         endoff = ofs & ((1 << lclusterbits) - 1);
483
484         err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false);
485         if (err)
486                 goto unmap_out;
487
488         if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
489                 vi->z_idataoff = m.nextpackoff;
490
491         map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
492         end = (m.lcn + 1ULL) << lclusterbits;
493
494         switch (m.type) {
495         case Z_EROFS_LCLUSTER_TYPE_PLAIN:
496         case Z_EROFS_LCLUSTER_TYPE_HEAD1:
497         case Z_EROFS_LCLUSTER_TYPE_HEAD2:
498                 if (endoff >= m.clusterofs) {
499                         m.headtype = m.type;
500                         map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
501                         /*
502                          * For ztailpacking files, in order to inline data more
503                          * effectively, special EOF lclusters are now supported
504                          * which can have three parts at most.
505                          */
506                         if (ztailpacking && end > inode->i_size)
507                                 end = inode->i_size;
508                         break;
509                 }
510                 /* m.lcn should be >= 1 if endoff < m.clusterofs */
511                 if (!m.lcn) {
512                         erofs_err(inode->i_sb,
513                                   "invalid logical cluster 0 at nid %llu",
514                                   vi->nid);
515                         err = -EFSCORRUPTED;
516                         goto unmap_out;
517                 }
518                 end = (m.lcn << lclusterbits) | m.clusterofs;
519                 map->m_flags |= EROFS_MAP_FULL_MAPPED;
520                 m.delta[0] = 1;
521                 fallthrough;
522         case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
523                 /* get the corresponding first chunk */
524                 err = z_erofs_extent_lookback(&m, m.delta[0]);
525                 if (err)
526                         goto unmap_out;
527                 break;
528         default:
529                 erofs_err(inode->i_sb,
530                           "unknown type %u @ offset %llu of nid %llu",
531                           m.type, ofs, vi->nid);
532                 err = -EOPNOTSUPP;
533                 goto unmap_out;
534         }
535         if (m.partialref)
536                 map->m_flags |= EROFS_MAP_PARTIAL_REF;
537         map->m_llen = end - map->m_la;
538
539         if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
540                 vi->z_tailextent_headlcn = m.lcn;
541                 /* for non-compact indexes, fragmentoff is 64 bits */
542                 if (fragment &&
543                     vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
544                         vi->z_fragmentoff |= (u64)m.pblk << 32;
545         }
546         if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
547                 map->m_flags |= EROFS_MAP_META;
548                 map->m_pa = vi->z_idataoff;
549                 map->m_plen = vi->z_idata_size;
550         } else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
551                 map->m_flags |= EROFS_MAP_FRAGMENT;
552         } else {
553                 map->m_pa = erofs_pos(inode->i_sb, m.pblk);
554                 err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
555                 if (err)
556                         goto unmap_out;
557         }
558
559         if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
560                 if (map->m_llen > map->m_plen) {
561                         DBG_BUGON(1);
562                         err = -EFSCORRUPTED;
563                         goto unmap_out;
564                 }
565                 if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
566                         map->m_algorithmformat =
567                                 Z_EROFS_COMPRESSION_INTERLACED;
568                 else
569                         map->m_algorithmformat =
570                                 Z_EROFS_COMPRESSION_SHIFTED;
571         } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
572                 map->m_algorithmformat = vi->z_algorithmtype[1];
573         } else {
574                 map->m_algorithmformat = vi->z_algorithmtype[0];
575         }
576
577         if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
578             ((flags & EROFS_GET_BLOCKS_READMORE) &&
579              map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
580              map->m_llen >= i_blocksize(inode))) {
581                 err = z_erofs_get_extent_decompressedlen(&m);
582                 if (!err)
583                         map->m_flags |= EROFS_MAP_FULL_MAPPED;
584         }
585
586 unmap_out:
587         erofs_unmap_metabuf(&m.map->buf);
588         return err;
589 }
590
591 static int z_erofs_fill_inode_lazy(struct inode *inode)
592 {
593         struct erofs_inode *const vi = EROFS_I(inode);
594         struct super_block *const sb = inode->i_sb;
595         int err, headnr;
596         erofs_off_t pos;
597         struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
598         void *kaddr;
599         struct z_erofs_map_header *h;
600
601         if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
602                 /*
603                  * paired with smp_mb() at the end of the function to ensure
604                  * fields will only be observed after the bit is set.
605                  */
606                 smp_mb();
607                 return 0;
608         }
609
610         if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
611                 return -ERESTARTSYS;
612
613         err = 0;
614         if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
615                 goto out_unlock;
616
617         pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
618         kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP);
619         if (IS_ERR(kaddr)) {
620                 err = PTR_ERR(kaddr);
621                 goto out_unlock;
622         }
623
624         h = kaddr + erofs_blkoff(sb, pos);
625         /*
626          * if the highest bit of the 8-byte map header is set, the whole file
627          * is stored in the packed inode. The rest bits keeps z_fragmentoff.
628          */
629         if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
630                 vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
631                 vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
632                 vi->z_tailextent_headlcn = 0;
633                 goto done;
634         }
635         vi->z_advise = le16_to_cpu(h->h_advise);
636         vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
637         vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
638
639         headnr = 0;
640         if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
641             vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
642                 erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
643                           headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
644                 err = -EOPNOTSUPP;
645                 goto out_put_metabuf;
646         }
647
648         vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7);
649         if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
650             vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
651                             Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
652                 erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
653                           vi->nid);
654                 err = -EFSCORRUPTED;
655                 goto out_put_metabuf;
656         }
657         if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
658             !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
659             !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
660                 erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
661                           vi->nid);
662                 err = -EFSCORRUPTED;
663                 goto out_put_metabuf;
664         }
665
666         if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
667                 struct erofs_map_blocks map = {
668                         .buf = __EROFS_BUF_INITIALIZER
669                 };
670
671                 vi->z_idata_size = le16_to_cpu(h->h_idata_size);
672                 err = z_erofs_do_map_blocks(inode, &map,
673                                             EROFS_GET_BLOCKS_FINDTAIL);
674                 erofs_put_metabuf(&map.buf);
675
676                 if (!map.m_plen ||
677                     erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) {
678                         erofs_err(sb, "invalid tail-packing pclustersize %llu",
679                                   map.m_plen);
680                         err = -EFSCORRUPTED;
681                 }
682                 if (err < 0)
683                         goto out_put_metabuf;
684         }
685
686         if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
687             !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
688                 struct erofs_map_blocks map = {
689                         .buf = __EROFS_BUF_INITIALIZER
690                 };
691
692                 vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
693                 err = z_erofs_do_map_blocks(inode, &map,
694                                             EROFS_GET_BLOCKS_FINDTAIL);
695                 erofs_put_metabuf(&map.buf);
696                 if (err < 0)
697                         goto out_put_metabuf;
698         }
699 done:
700         /* paired with smp_mb() at the beginning of the function */
701         smp_mb();
702         set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
703 out_put_metabuf:
704         erofs_put_metabuf(&buf);
705 out_unlock:
706         clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
707         return err;
708 }
709
710 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
711                             int flags)
712 {
713         struct erofs_inode *const vi = EROFS_I(inode);
714         int err = 0;
715
716         trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
717
718         /* when trying to read beyond EOF, leave it unmapped */
719         if (map->m_la >= inode->i_size) {
720                 map->m_llen = map->m_la + 1 - inode->i_size;
721                 map->m_la = inode->i_size;
722                 map->m_flags = 0;
723                 goto out;
724         }
725
726         err = z_erofs_fill_inode_lazy(inode);
727         if (err)
728                 goto out;
729
730         if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
731             !vi->z_tailextent_headlcn) {
732                 map->m_la = 0;
733                 map->m_llen = inode->i_size;
734                 map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
735                                 EROFS_MAP_FRAGMENT;
736                 goto out;
737         }
738
739         err = z_erofs_do_map_blocks(inode, map, flags);
740 out:
741         trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
742         return err;
743 }
744
745 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
746                                 loff_t length, unsigned int flags,
747                                 struct iomap *iomap, struct iomap *srcmap)
748 {
749         int ret;
750         struct erofs_map_blocks map = { .m_la = offset };
751
752         ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
753         erofs_put_metabuf(&map.buf);
754         if (ret < 0)
755                 return ret;
756
757         iomap->bdev = inode->i_sb->s_bdev;
758         iomap->offset = map.m_la;
759         iomap->length = map.m_llen;
760         if (map.m_flags & EROFS_MAP_MAPPED) {
761                 iomap->type = IOMAP_MAPPED;
762                 iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
763                               IOMAP_NULL_ADDR : map.m_pa;
764         } else {
765                 iomap->type = IOMAP_HOLE;
766                 iomap->addr = IOMAP_NULL_ADDR;
767                 /*
768                  * No strict rule on how to describe extents for post EOF, yet
769                  * we need to do like below. Otherwise, iomap itself will get
770                  * into an endless loop on post EOF.
771                  *
772                  * Calculate the effective offset by subtracting extent start
773                  * (map.m_la) from the requested offset, and add it to length.
774                  * (NB: offset >= map.m_la always)
775                  */
776                 if (iomap->offset >= inode->i_size)
777                         iomap->length = length + offset - map.m_la;
778         }
779         iomap->flags = 0;
780         return 0;
781 }
782
783 const struct iomap_ops z_erofs_iomap_report_ops = {
784         .iomap_begin = z_erofs_iomap_begin_report,
785 };