fs/ntfs3: Do not use driver own alloc wrappers
[linux-2.6-block.git] / fs / ntfs3 / bitmap.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *
4  * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5  *
6  * This code builds two trees of free clusters extents.
7  * Trees are sorted by start of extent and by length of extent.
8  * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
9  * In extreme case code reads on-disk bitmap to find free clusters
10  *
11  */
12
13 #include <linux/blkdev.h>
14 #include <linux/buffer_head.h>
15 #include <linux/fs.h>
16 #include <linux/nls.h>
17
18 #include "debug.h"
19 #include "ntfs.h"
20 #include "ntfs_fs.h"
21
22 /*
23  * Maximum number of extents in tree.
24  */
25 #define NTFS_MAX_WND_EXTENTS (32u * 1024u)
26
27 struct rb_node_key {
28         struct rb_node node;
29         size_t key;
30 };
31
32 /*
33  * Tree is sorted by start (key)
34  */
35 struct e_node {
36         struct rb_node_key start; /* Tree sorted by start */
37         struct rb_node_key count; /* Tree sorted by len*/
38 };
39
40 static int wnd_rescan(struct wnd_bitmap *wnd);
41 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
42 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
43
44 static struct kmem_cache *ntfs_enode_cachep;
45
46 int __init ntfs3_init_bitmap(void)
47 {
48         ntfs_enode_cachep =
49                 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
50                                   SLAB_RECLAIM_ACCOUNT, NULL);
51         return ntfs_enode_cachep ? 0 : -ENOMEM;
52 }
53
54 void ntfs3_exit_bitmap(void)
55 {
56         kmem_cache_destroy(ntfs_enode_cachep);
57 }
58
59 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
60 {
61         return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
62 }
63
64 /*
65  * b_pos + b_len - biggest fragment
66  * Scan range [wpos wbits) window 'buf'
67  * Returns -1 if not found
68  */
69 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
70                        size_t to_alloc, size_t *prev_tail, size_t *b_pos,
71                        size_t *b_len)
72 {
73         while (wpos < wend) {
74                 size_t free_len;
75                 u32 free_bits, end;
76                 u32 used = find_next_zero_bit(buf, wend, wpos);
77
78                 if (used >= wend) {
79                         if (*b_len < *prev_tail) {
80                                 *b_pos = wbit - *prev_tail;
81                                 *b_len = *prev_tail;
82                         }
83
84                         *prev_tail = 0;
85                         return -1;
86                 }
87
88                 if (used > wpos) {
89                         wpos = used;
90                         if (*b_len < *prev_tail) {
91                                 *b_pos = wbit - *prev_tail;
92                                 *b_len = *prev_tail;
93                         }
94
95                         *prev_tail = 0;
96                 }
97
98                 /*
99                  * Now we have a fragment [wpos, wend) staring with 0
100                  */
101                 end = wpos + to_alloc - *prev_tail;
102                 free_bits = find_next_bit(buf, min(end, wend), wpos);
103
104                 free_len = *prev_tail + free_bits - wpos;
105
106                 if (*b_len < free_len) {
107                         *b_pos = wbit + wpos - *prev_tail;
108                         *b_len = free_len;
109                 }
110
111                 if (free_len >= to_alloc)
112                         return wbit + wpos - *prev_tail;
113
114                 if (free_bits >= wend) {
115                         *prev_tail += free_bits - wpos;
116                         return -1;
117                 }
118
119                 wpos = free_bits + 1;
120
121                 *prev_tail = 0;
122         }
123
124         return -1;
125 }
126
127 /*
128  * wnd_close
129  *
130  * Frees all resources
131  */
132 void wnd_close(struct wnd_bitmap *wnd)
133 {
134         struct rb_node *node, *next;
135
136         kfree(wnd->free_bits);
137         run_close(&wnd->run);
138
139         node = rb_first(&wnd->start_tree);
140
141         while (node) {
142                 next = rb_next(node);
143                 rb_erase(node, &wnd->start_tree);
144                 kmem_cache_free(ntfs_enode_cachep,
145                                 rb_entry(node, struct e_node, start.node));
146                 node = next;
147         }
148 }
149
150 static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
151 {
152         struct rb_node **p = &root->rb_node;
153         struct rb_node *r = NULL;
154
155         while (*p) {
156                 struct rb_node_key *k;
157
158                 k = rb_entry(*p, struct rb_node_key, node);
159                 if (v < k->key) {
160                         p = &(*p)->rb_left;
161                 } else if (v > k->key) {
162                         r = &k->node;
163                         p = &(*p)->rb_right;
164                 } else {
165                         return &k->node;
166                 }
167         }
168
169         return r;
170 }
171
172 /*
173  * rb_insert_count
174  *
175  * Helper function to insert special kind of 'count' tree
176  */
177 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
178 {
179         struct rb_node **p = &root->rb_node;
180         struct rb_node *parent = NULL;
181         size_t e_ckey = e->count.key;
182         size_t e_skey = e->start.key;
183
184         while (*p) {
185                 struct e_node *k =
186                         rb_entry(parent = *p, struct e_node, count.node);
187
188                 if (e_ckey > k->count.key) {
189                         p = &(*p)->rb_left;
190                 } else if (e_ckey < k->count.key) {
191                         p = &(*p)->rb_right;
192                 } else if (e_skey < k->start.key) {
193                         p = &(*p)->rb_left;
194                 } else if (e_skey > k->start.key) {
195                         p = &(*p)->rb_right;
196                 } else {
197                         WARN_ON(1);
198                         return false;
199                 }
200         }
201
202         rb_link_node(&e->count.node, parent, p);
203         rb_insert_color(&e->count.node, root);
204         return true;
205 }
206
207 /*
208  * inline bool rb_insert_start
209  *
210  * Helper function to insert special kind of 'start' tree
211  */
212 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
213 {
214         struct rb_node **p = &root->rb_node;
215         struct rb_node *parent = NULL;
216         size_t e_skey = e->start.key;
217
218         while (*p) {
219                 struct e_node *k;
220
221                 parent = *p;
222
223                 k = rb_entry(parent, struct e_node, start.node);
224                 if (e_skey < k->start.key) {
225                         p = &(*p)->rb_left;
226                 } else if (e_skey > k->start.key) {
227                         p = &(*p)->rb_right;
228                 } else {
229                         WARN_ON(1);
230                         return false;
231                 }
232         }
233
234         rb_link_node(&e->start.node, parent, p);
235         rb_insert_color(&e->start.node, root);
236         return true;
237 }
238
239 /*
240  * wnd_add_free_ext
241  *
242  * adds a new extent of free space
243  * build = 1 when building tree
244  */
245 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
246                              bool build)
247 {
248         struct e_node *e, *e0 = NULL;
249         size_t ib, end_in = bit + len;
250         struct rb_node *n;
251
252         if (build) {
253                 /* Use extent_min to filter too short extents */
254                 if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
255                     len <= wnd->extent_min) {
256                         wnd->uptodated = -1;
257                         return;
258                 }
259         } else {
260                 /* Try to find extent before 'bit' */
261                 n = rb_lookup(&wnd->start_tree, bit);
262
263                 if (!n) {
264                         n = rb_first(&wnd->start_tree);
265                 } else {
266                         e = rb_entry(n, struct e_node, start.node);
267                         n = rb_next(n);
268                         if (e->start.key + e->count.key == bit) {
269                                 /* Remove left */
270                                 bit = e->start.key;
271                                 len += e->count.key;
272                                 rb_erase(&e->start.node, &wnd->start_tree);
273                                 rb_erase(&e->count.node, &wnd->count_tree);
274                                 wnd->count -= 1;
275                                 e0 = e;
276                         }
277                 }
278
279                 while (n) {
280                         size_t next_end;
281
282                         e = rb_entry(n, struct e_node, start.node);
283                         next_end = e->start.key + e->count.key;
284                         if (e->start.key > end_in)
285                                 break;
286
287                         /* Remove right */
288                         n = rb_next(n);
289                         len += next_end - end_in;
290                         end_in = next_end;
291                         rb_erase(&e->start.node, &wnd->start_tree);
292                         rb_erase(&e->count.node, &wnd->count_tree);
293                         wnd->count -= 1;
294
295                         if (!e0)
296                                 e0 = e;
297                         else
298                                 kmem_cache_free(ntfs_enode_cachep, e);
299                 }
300
301                 if (wnd->uptodated != 1) {
302                         /* Check bits before 'bit' */
303                         ib = wnd->zone_bit == wnd->zone_end ||
304                                              bit < wnd->zone_end
305                                      ? 0
306                                      : wnd->zone_end;
307
308                         while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
309                                 bit -= 1;
310                                 len += 1;
311                         }
312
313                         /* Check bits after 'end_in' */
314                         ib = wnd->zone_bit == wnd->zone_end ||
315                                              end_in > wnd->zone_bit
316                                      ? wnd->nbits
317                                      : wnd->zone_bit;
318
319                         while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
320                                 end_in += 1;
321                                 len += 1;
322                         }
323                 }
324         }
325         /* Insert new fragment */
326         if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
327                 if (e0)
328                         kmem_cache_free(ntfs_enode_cachep, e0);
329
330                 wnd->uptodated = -1;
331
332                 /* Compare with smallest fragment */
333                 n = rb_last(&wnd->count_tree);
334                 e = rb_entry(n, struct e_node, count.node);
335                 if (len <= e->count.key)
336                         goto out; /* Do not insert small fragments */
337
338                 if (build) {
339                         struct e_node *e2;
340
341                         n = rb_prev(n);
342                         e2 = rb_entry(n, struct e_node, count.node);
343                         /* smallest fragment will be 'e2->count.key' */
344                         wnd->extent_min = e2->count.key;
345                 }
346
347                 /* Replace smallest fragment by new one */
348                 rb_erase(&e->start.node, &wnd->start_tree);
349                 rb_erase(&e->count.node, &wnd->count_tree);
350                 wnd->count -= 1;
351         } else {
352                 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
353                 if (!e) {
354                         wnd->uptodated = -1;
355                         goto out;
356                 }
357
358                 if (build && len <= wnd->extent_min)
359                         wnd->extent_min = len;
360         }
361         e->start.key = bit;
362         e->count.key = len;
363         if (len > wnd->extent_max)
364                 wnd->extent_max = len;
365
366         rb_insert_start(&wnd->start_tree, e);
367         rb_insert_count(&wnd->count_tree, e);
368         wnd->count += 1;
369
370 out:;
371 }
372
373 /*
374  * wnd_remove_free_ext
375  *
376  * removes a run from the cached free space
377  */
378 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
379 {
380         struct rb_node *n, *n3;
381         struct e_node *e, *e3;
382         size_t end_in = bit + len;
383         size_t end3, end, new_key, new_len, max_new_len;
384
385         /* Try to find extent before 'bit' */
386         n = rb_lookup(&wnd->start_tree, bit);
387
388         if (!n)
389                 return;
390
391         e = rb_entry(n, struct e_node, start.node);
392         end = e->start.key + e->count.key;
393
394         new_key = new_len = 0;
395         len = e->count.key;
396
397         /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n' */
398         if (e->start.key > bit)
399                 ;
400         else if (end_in <= end) {
401                 /* Range [bit,end_in) inside 'e' */
402                 new_key = end_in;
403                 new_len = end - end_in;
404                 len = bit - e->start.key;
405         } else if (bit > end) {
406                 bool bmax = false;
407
408                 n3 = rb_next(n);
409
410                 while (n3) {
411                         e3 = rb_entry(n3, struct e_node, start.node);
412                         if (e3->start.key >= end_in)
413                                 break;
414
415                         if (e3->count.key == wnd->extent_max)
416                                 bmax = true;
417
418                         end3 = e3->start.key + e3->count.key;
419                         if (end3 > end_in) {
420                                 e3->start.key = end_in;
421                                 rb_erase(&e3->count.node, &wnd->count_tree);
422                                 e3->count.key = end3 - end_in;
423                                 rb_insert_count(&wnd->count_tree, e3);
424                                 break;
425                         }
426
427                         n3 = rb_next(n3);
428                         rb_erase(&e3->start.node, &wnd->start_tree);
429                         rb_erase(&e3->count.node, &wnd->count_tree);
430                         wnd->count -= 1;
431                         kmem_cache_free(ntfs_enode_cachep, e3);
432                 }
433                 if (!bmax)
434                         return;
435                 n3 = rb_first(&wnd->count_tree);
436                 wnd->extent_max =
437                         n3 ? rb_entry(n3, struct e_node, count.node)->count.key
438                            : 0;
439                 return;
440         }
441
442         if (e->count.key != wnd->extent_max) {
443                 ;
444         } else if (rb_prev(&e->count.node)) {
445                 ;
446         } else {
447                 n3 = rb_next(&e->count.node);
448                 max_new_len = len > new_len ? len : new_len;
449                 if (!n3) {
450                         wnd->extent_max = max_new_len;
451                 } else {
452                         e3 = rb_entry(n3, struct e_node, count.node);
453                         wnd->extent_max = max(e3->count.key, max_new_len);
454                 }
455         }
456
457         if (!len) {
458                 if (new_len) {
459                         e->start.key = new_key;
460                         rb_erase(&e->count.node, &wnd->count_tree);
461                         e->count.key = new_len;
462                         rb_insert_count(&wnd->count_tree, e);
463                 } else {
464                         rb_erase(&e->start.node, &wnd->start_tree);
465                         rb_erase(&e->count.node, &wnd->count_tree);
466                         wnd->count -= 1;
467                         kmem_cache_free(ntfs_enode_cachep, e);
468                 }
469                 goto out;
470         }
471         rb_erase(&e->count.node, &wnd->count_tree);
472         e->count.key = len;
473         rb_insert_count(&wnd->count_tree, e);
474
475         if (!new_len)
476                 goto out;
477
478         if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
479                 wnd->uptodated = -1;
480
481                 /* Get minimal extent */
482                 e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
483                              count.node);
484                 if (e->count.key > new_len)
485                         goto out;
486
487                 /* Replace minimum */
488                 rb_erase(&e->start.node, &wnd->start_tree);
489                 rb_erase(&e->count.node, &wnd->count_tree);
490                 wnd->count -= 1;
491         } else {
492                 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
493                 if (!e)
494                         wnd->uptodated = -1;
495         }
496
497         if (e) {
498                 e->start.key = new_key;
499                 e->count.key = new_len;
500                 rb_insert_start(&wnd->start_tree, e);
501                 rb_insert_count(&wnd->count_tree, e);
502                 wnd->count += 1;
503         }
504
505 out:
506         if (!wnd->count && 1 != wnd->uptodated)
507                 wnd_rescan(wnd);
508 }
509
510 /*
511  * wnd_rescan
512  *
513  * Scan all bitmap. used while initialization.
514  */
515 static int wnd_rescan(struct wnd_bitmap *wnd)
516 {
517         int err = 0;
518         size_t prev_tail = 0;
519         struct super_block *sb = wnd->sb;
520         struct ntfs_sb_info *sbi = sb->s_fs_info;
521         u64 lbo, len = 0;
522         u32 blocksize = sb->s_blocksize;
523         u8 cluster_bits = sbi->cluster_bits;
524         u32 wbits = 8 * sb->s_blocksize;
525         u32 used, frb;
526         const ulong *buf;
527         size_t wpos, wbit, iw, vbo;
528         struct buffer_head *bh = NULL;
529         CLST lcn, clen;
530
531         wnd->uptodated = 0;
532         wnd->extent_max = 0;
533         wnd->extent_min = MINUS_ONE_T;
534         wnd->total_zeroes = 0;
535
536         vbo = 0;
537
538         for (iw = 0; iw < wnd->nwnd; iw++) {
539                 if (iw + 1 == wnd->nwnd)
540                         wbits = wnd->bits_last;
541
542                 if (wnd->inited) {
543                         if (!wnd->free_bits[iw]) {
544                                 /* all ones */
545                                 if (prev_tail) {
546                                         wnd_add_free_ext(wnd,
547                                                          vbo * 8 - prev_tail,
548                                                          prev_tail, true);
549                                         prev_tail = 0;
550                                 }
551                                 goto next_wnd;
552                         }
553                         if (wbits == wnd->free_bits[iw]) {
554                                 /* all zeroes */
555                                 prev_tail += wbits;
556                                 wnd->total_zeroes += wbits;
557                                 goto next_wnd;
558                         }
559                 }
560
561                 if (!len) {
562                         u32 off = vbo & sbi->cluster_mask;
563
564                         if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
565                                               &lcn, &clen, NULL)) {
566                                 err = -ENOENT;
567                                 goto out;
568                         }
569
570                         lbo = ((u64)lcn << cluster_bits) + off;
571                         len = ((u64)clen << cluster_bits) - off;
572                 }
573
574                 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
575                 if (!bh) {
576                         err = -EIO;
577                         goto out;
578                 }
579
580                 buf = (ulong *)bh->b_data;
581
582                 used = __bitmap_weight(buf, wbits);
583                 if (used < wbits) {
584                         frb = wbits - used;
585                         wnd->free_bits[iw] = frb;
586                         wnd->total_zeroes += frb;
587                 }
588
589                 wpos = 0;
590                 wbit = vbo * 8;
591
592                 if (wbit + wbits > wnd->nbits)
593                         wbits = wnd->nbits - wbit;
594
595                 do {
596                         used = find_next_zero_bit(buf, wbits, wpos);
597
598                         if (used > wpos && prev_tail) {
599                                 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
600                                                  prev_tail, true);
601                                 prev_tail = 0;
602                         }
603
604                         wpos = used;
605
606                         if (wpos >= wbits) {
607                                 /* No free blocks */
608                                 prev_tail = 0;
609                                 break;
610                         }
611
612                         frb = find_next_bit(buf, wbits, wpos);
613                         if (frb >= wbits) {
614                                 /* keep last free block */
615                                 prev_tail += frb - wpos;
616                                 break;
617                         }
618
619                         wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
620                                          frb + prev_tail - wpos, true);
621
622                         /* Skip free block and first '1' */
623                         wpos = frb + 1;
624                         /* Reset previous tail */
625                         prev_tail = 0;
626                 } while (wpos < wbits);
627
628 next_wnd:
629
630                 if (bh)
631                         put_bh(bh);
632                 bh = NULL;
633
634                 vbo += blocksize;
635                 if (len) {
636                         len -= blocksize;
637                         lbo += blocksize;
638                 }
639         }
640
641         /* Add last block */
642         if (prev_tail)
643                 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
644
645         /*
646          * Before init cycle wnd->uptodated was 0
647          * If any errors or limits occurs while initialization then
648          * wnd->uptodated will be -1
649          * If 'uptodated' is still 0 then Tree is really updated
650          */
651         if (!wnd->uptodated)
652                 wnd->uptodated = 1;
653
654         if (wnd->zone_bit != wnd->zone_end) {
655                 size_t zlen = wnd->zone_end - wnd->zone_bit;
656
657                 wnd->zone_end = wnd->zone_bit;
658                 wnd_zone_set(wnd, wnd->zone_bit, zlen);
659         }
660
661 out:
662         return err;
663 }
664
665 /*
666  * wnd_init
667  */
668 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
669 {
670         int err;
671         u32 blocksize = sb->s_blocksize;
672         u32 wbits = blocksize * 8;
673
674         init_rwsem(&wnd->rw_lock);
675
676         wnd->sb = sb;
677         wnd->nbits = nbits;
678         wnd->total_zeroes = nbits;
679         wnd->extent_max = MINUS_ONE_T;
680         wnd->zone_bit = wnd->zone_end = 0;
681         wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
682         wnd->bits_last = nbits & (wbits - 1);
683         if (!wnd->bits_last)
684                 wnd->bits_last = wbits;
685
686         wnd->free_bits = kzalloc(wnd->nwnd * sizeof(u16), GFP_NOFS);
687         if (!wnd->free_bits)
688                 return -ENOMEM;
689
690         err = wnd_rescan(wnd);
691         if (err)
692                 return err;
693
694         wnd->inited = true;
695
696         return 0;
697 }
698
699 /*
700  * wnd_map
701  *
702  * call sb_bread for requested window
703  */
704 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
705 {
706         size_t vbo;
707         CLST lcn, clen;
708         struct super_block *sb = wnd->sb;
709         struct ntfs_sb_info *sbi;
710         struct buffer_head *bh;
711         u64 lbo;
712
713         sbi = sb->s_fs_info;
714         vbo = (u64)iw << sb->s_blocksize_bits;
715
716         if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
717                               NULL)) {
718                 return ERR_PTR(-ENOENT);
719         }
720
721         lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
722
723         bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
724         if (!bh)
725                 return ERR_PTR(-EIO);
726
727         return bh;
728 }
729
730 /*
731  * wnd_set_free
732  *
733  * Marks the bits range from bit to bit + bits as free
734  */
735 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
736 {
737         int err = 0;
738         struct super_block *sb = wnd->sb;
739         size_t bits0 = bits;
740         u32 wbits = 8 * sb->s_blocksize;
741         size_t iw = bit >> (sb->s_blocksize_bits + 3);
742         u32 wbit = bit & (wbits - 1);
743         struct buffer_head *bh;
744
745         while (iw < wnd->nwnd && bits) {
746                 u32 tail, op;
747                 ulong *buf;
748
749                 if (iw + 1 == wnd->nwnd)
750                         wbits = wnd->bits_last;
751
752                 tail = wbits - wbit;
753                 op = tail < bits ? tail : bits;
754
755                 bh = wnd_map(wnd, iw);
756                 if (IS_ERR(bh)) {
757                         err = PTR_ERR(bh);
758                         break;
759                 }
760
761                 buf = (ulong *)bh->b_data;
762
763                 lock_buffer(bh);
764
765                 __bitmap_clear(buf, wbit, op);
766
767                 wnd->free_bits[iw] += op;
768
769                 set_buffer_uptodate(bh);
770                 mark_buffer_dirty(bh);
771                 unlock_buffer(bh);
772                 put_bh(bh);
773
774                 wnd->total_zeroes += op;
775                 bits -= op;
776                 wbit = 0;
777                 iw += 1;
778         }
779
780         wnd_add_free_ext(wnd, bit, bits0, false);
781
782         return err;
783 }
784
785 /*
786  * wnd_set_used
787  *
788  * Marks the bits range from bit to bit + bits as used
789  */
790 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
791 {
792         int err = 0;
793         struct super_block *sb = wnd->sb;
794         size_t bits0 = bits;
795         size_t iw = bit >> (sb->s_blocksize_bits + 3);
796         u32 wbits = 8 * sb->s_blocksize;
797         u32 wbit = bit & (wbits - 1);
798         struct buffer_head *bh;
799
800         while (iw < wnd->nwnd && bits) {
801                 u32 tail, op;
802                 ulong *buf;
803
804                 if (unlikely(iw + 1 == wnd->nwnd))
805                         wbits = wnd->bits_last;
806
807                 tail = wbits - wbit;
808                 op = tail < bits ? tail : bits;
809
810                 bh = wnd_map(wnd, iw);
811                 if (IS_ERR(bh)) {
812                         err = PTR_ERR(bh);
813                         break;
814                 }
815                 buf = (ulong *)bh->b_data;
816
817                 lock_buffer(bh);
818
819                 __bitmap_set(buf, wbit, op);
820                 wnd->free_bits[iw] -= op;
821
822                 set_buffer_uptodate(bh);
823                 mark_buffer_dirty(bh);
824                 unlock_buffer(bh);
825                 put_bh(bh);
826
827                 wnd->total_zeroes -= op;
828                 bits -= op;
829                 wbit = 0;
830                 iw += 1;
831         }
832
833         if (!RB_EMPTY_ROOT(&wnd->start_tree))
834                 wnd_remove_free_ext(wnd, bit, bits0);
835
836         return err;
837 }
838
839 /*
840  * wnd_is_free_hlp
841  *
842  * Returns true if all clusters [bit, bit+bits) are free (bitmap only)
843  */
844 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
845 {
846         struct super_block *sb = wnd->sb;
847         size_t iw = bit >> (sb->s_blocksize_bits + 3);
848         u32 wbits = 8 * sb->s_blocksize;
849         u32 wbit = bit & (wbits - 1);
850
851         while (iw < wnd->nwnd && bits) {
852                 u32 tail, op;
853
854                 if (unlikely(iw + 1 == wnd->nwnd))
855                         wbits = wnd->bits_last;
856
857                 tail = wbits - wbit;
858                 op = tail < bits ? tail : bits;
859
860                 if (wbits != wnd->free_bits[iw]) {
861                         bool ret;
862                         struct buffer_head *bh = wnd_map(wnd, iw);
863
864                         if (IS_ERR(bh))
865                                 return false;
866
867                         ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
868
869                         put_bh(bh);
870                         if (!ret)
871                                 return false;
872                 }
873
874                 bits -= op;
875                 wbit = 0;
876                 iw += 1;
877         }
878
879         return true;
880 }
881
882 /*
883  * wnd_is_free
884  *
885  * Returns true if all clusters [bit, bit+bits) are free
886  */
887 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
888 {
889         bool ret;
890         struct rb_node *n;
891         size_t end;
892         struct e_node *e;
893
894         if (RB_EMPTY_ROOT(&wnd->start_tree))
895                 goto use_wnd;
896
897         n = rb_lookup(&wnd->start_tree, bit);
898         if (!n)
899                 goto use_wnd;
900
901         e = rb_entry(n, struct e_node, start.node);
902
903         end = e->start.key + e->count.key;
904
905         if (bit < end && bit + bits <= end)
906                 return true;
907
908 use_wnd:
909         ret = wnd_is_free_hlp(wnd, bit, bits);
910
911         return ret;
912 }
913
914 /*
915  * wnd_is_used
916  *
917  * Returns true if all clusters [bit, bit+bits) are used
918  */
919 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
920 {
921         bool ret = false;
922         struct super_block *sb = wnd->sb;
923         size_t iw = bit >> (sb->s_blocksize_bits + 3);
924         u32 wbits = 8 * sb->s_blocksize;
925         u32 wbit = bit & (wbits - 1);
926         size_t end;
927         struct rb_node *n;
928         struct e_node *e;
929
930         if (RB_EMPTY_ROOT(&wnd->start_tree))
931                 goto use_wnd;
932
933         end = bit + bits;
934         n = rb_lookup(&wnd->start_tree, end - 1);
935         if (!n)
936                 goto use_wnd;
937
938         e = rb_entry(n, struct e_node, start.node);
939         if (e->start.key + e->count.key > bit)
940                 return false;
941
942 use_wnd:
943         while (iw < wnd->nwnd && bits) {
944                 u32 tail, op;
945
946                 if (unlikely(iw + 1 == wnd->nwnd))
947                         wbits = wnd->bits_last;
948
949                 tail = wbits - wbit;
950                 op = tail < bits ? tail : bits;
951
952                 if (wnd->free_bits[iw]) {
953                         bool ret;
954                         struct buffer_head *bh = wnd_map(wnd, iw);
955
956                         if (IS_ERR(bh))
957                                 goto out;
958
959                         ret = are_bits_set((ulong *)bh->b_data, wbit, op);
960                         put_bh(bh);
961                         if (!ret)
962                                 goto out;
963                 }
964
965                 bits -= op;
966                 wbit = 0;
967                 iw += 1;
968         }
969         ret = true;
970
971 out:
972         return ret;
973 }
974
975 /*
976  * wnd_find
977  * - flags - BITMAP_FIND_XXX flags
978  *
979  * looks for free space
980  * Returns 0 if not found
981  */
982 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
983                 size_t flags, size_t *allocated)
984 {
985         struct super_block *sb;
986         u32 wbits, wpos, wzbit, wzend;
987         size_t fnd, max_alloc, b_len, b_pos;
988         size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
989         size_t to_alloc0 = to_alloc;
990         const ulong *buf;
991         const struct e_node *e;
992         const struct rb_node *pr, *cr;
993         u8 log2_bits;
994         bool fbits_valid;
995         struct buffer_head *bh;
996
997         /* fast checking for available free space */
998         if (flags & BITMAP_FIND_FULL) {
999                 size_t zeroes = wnd_zeroes(wnd);
1000
1001                 zeroes -= wnd->zone_end - wnd->zone_bit;
1002                 if (zeroes < to_alloc0)
1003                         goto no_space;
1004
1005                 if (to_alloc0 > wnd->extent_max)
1006                         goto no_space;
1007         } else {
1008                 if (to_alloc > wnd->extent_max)
1009                         to_alloc = wnd->extent_max;
1010         }
1011
1012         if (wnd->zone_bit <= hint && hint < wnd->zone_end)
1013                 hint = wnd->zone_end;
1014
1015         max_alloc = wnd->nbits;
1016         b_len = b_pos = 0;
1017
1018         if (hint >= max_alloc)
1019                 hint = 0;
1020
1021         if (RB_EMPTY_ROOT(&wnd->start_tree)) {
1022                 if (wnd->uptodated == 1) {
1023                         /* extents tree is updated -> no free space */
1024                         goto no_space;
1025                 }
1026                 goto scan_bitmap;
1027         }
1028
1029         e = NULL;
1030         if (!hint)
1031                 goto allocate_biggest;
1032
1033         /* Use hint: enumerate extents by start >= hint */
1034         pr = NULL;
1035         cr = wnd->start_tree.rb_node;
1036
1037         for (;;) {
1038                 e = rb_entry(cr, struct e_node, start.node);
1039
1040                 if (e->start.key == hint)
1041                         break;
1042
1043                 if (e->start.key < hint) {
1044                         pr = cr;
1045                         cr = cr->rb_right;
1046                         if (!cr)
1047                                 break;
1048                         continue;
1049                 }
1050
1051                 cr = cr->rb_left;
1052                 if (!cr) {
1053                         e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
1054                         break;
1055                 }
1056         }
1057
1058         if (!e)
1059                 goto allocate_biggest;
1060
1061         if (e->start.key + e->count.key > hint) {
1062                 /* We have found extension with 'hint' inside */
1063                 size_t len = e->start.key + e->count.key - hint;
1064
1065                 if (len >= to_alloc && hint + to_alloc <= max_alloc) {
1066                         fnd = hint;
1067                         goto found;
1068                 }
1069
1070                 if (!(flags & BITMAP_FIND_FULL)) {
1071                         if (len > to_alloc)
1072                                 len = to_alloc;
1073
1074                         if (hint + len <= max_alloc) {
1075                                 fnd = hint;
1076                                 to_alloc = len;
1077                                 goto found;
1078                         }
1079                 }
1080         }
1081
1082 allocate_biggest:
1083         /* Allocate from biggest free extent */
1084         e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
1085         if (e->count.key != wnd->extent_max)
1086                 wnd->extent_max = e->count.key;
1087
1088         if (e->count.key < max_alloc) {
1089                 if (e->count.key >= to_alloc) {
1090                         ;
1091                 } else if (flags & BITMAP_FIND_FULL) {
1092                         if (e->count.key < to_alloc0) {
1093                                 /* Biggest free block is less then requested */
1094                                 goto no_space;
1095                         }
1096                         to_alloc = e->count.key;
1097                 } else if (-1 != wnd->uptodated) {
1098                         to_alloc = e->count.key;
1099                 } else {
1100                         /* Check if we can use more bits */
1101                         size_t op, max_check;
1102                         struct rb_root start_tree;
1103
1104                         memcpy(&start_tree, &wnd->start_tree,
1105                                sizeof(struct rb_root));
1106                         memset(&wnd->start_tree, 0, sizeof(struct rb_root));
1107
1108                         max_check = e->start.key + to_alloc;
1109                         if (max_check > max_alloc)
1110                                 max_check = max_alloc;
1111                         for (op = e->start.key + e->count.key; op < max_check;
1112                              op++) {
1113                                 if (!wnd_is_free(wnd, op, 1))
1114                                         break;
1115                         }
1116                         memcpy(&wnd->start_tree, &start_tree,
1117                                sizeof(struct rb_root));
1118                         to_alloc = op - e->start.key;
1119                 }
1120
1121                 /* Prepare to return */
1122                 fnd = e->start.key;
1123                 if (e->start.key + to_alloc > max_alloc)
1124                         to_alloc = max_alloc - e->start.key;
1125                 goto found;
1126         }
1127
1128         if (wnd->uptodated == 1) {
1129                 /* extents tree is updated -> no free space */
1130                 goto no_space;
1131         }
1132
1133         b_len = e->count.key;
1134         b_pos = e->start.key;
1135
1136 scan_bitmap:
1137         sb = wnd->sb;
1138         log2_bits = sb->s_blocksize_bits + 3;
1139
1140         /* At most two ranges [hint, max_alloc) + [0, hint) */
1141 Again:
1142
1143         /* TODO: optimize request for case nbits > wbits */
1144         iw = hint >> log2_bits;
1145         wbits = sb->s_blocksize * 8;
1146         wpos = hint & (wbits - 1);
1147         prev_tail = 0;
1148         fbits_valid = true;
1149
1150         if (max_alloc == wnd->nbits) {
1151                 nwnd = wnd->nwnd;
1152         } else {
1153                 size_t t = max_alloc + wbits - 1;
1154
1155                 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
1156         }
1157
1158         /* Enumerate all windows */
1159         for (; iw < nwnd; iw++) {
1160                 wbit = iw << log2_bits;
1161
1162                 if (!wnd->free_bits[iw]) {
1163                         if (prev_tail > b_len) {
1164                                 b_pos = wbit - prev_tail;
1165                                 b_len = prev_tail;
1166                         }
1167
1168                         /* Skip full used window */
1169                         prev_tail = 0;
1170                         wpos = 0;
1171                         continue;
1172                 }
1173
1174                 if (unlikely(iw + 1 == nwnd)) {
1175                         if (max_alloc == wnd->nbits) {
1176                                 wbits = wnd->bits_last;
1177                         } else {
1178                                 size_t t = max_alloc & (wbits - 1);
1179
1180                                 if (t) {
1181                                         wbits = t;
1182                                         fbits_valid = false;
1183                                 }
1184                         }
1185                 }
1186
1187                 if (wnd->zone_end > wnd->zone_bit) {
1188                         ebit = wbit + wbits;
1189                         zbit = max(wnd->zone_bit, wbit);
1190                         zend = min(wnd->zone_end, ebit);
1191
1192                         /* Here we have a window [wbit, ebit) and zone [zbit, zend) */
1193                         if (zend <= zbit) {
1194                                 /* Zone does not overlap window */
1195                         } else {
1196                                 wzbit = zbit - wbit;
1197                                 wzend = zend - wbit;
1198
1199                                 /* Zone overlaps window */
1200                                 if (wnd->free_bits[iw] == wzend - wzbit) {
1201                                         prev_tail = 0;
1202                                         wpos = 0;
1203                                         continue;
1204                                 }
1205
1206                                 /* Scan two ranges window: [wbit, zbit) and [zend, ebit) */
1207                                 bh = wnd_map(wnd, iw);
1208
1209                                 if (IS_ERR(bh)) {
1210                                         /* TODO: error */
1211                                         prev_tail = 0;
1212                                         wpos = 0;
1213                                         continue;
1214                                 }
1215
1216                                 buf = (ulong *)bh->b_data;
1217
1218                                 /* Scan range [wbit, zbit) */
1219                                 if (wpos < wzbit) {
1220                                         /* Scan range [wpos, zbit) */
1221                                         fnd = wnd_scan(buf, wbit, wpos, wzbit,
1222                                                        to_alloc, &prev_tail,
1223                                                        &b_pos, &b_len);
1224                                         if (fnd != MINUS_ONE_T) {
1225                                                 put_bh(bh);
1226                                                 goto found;
1227                                         }
1228                                 }
1229
1230                                 prev_tail = 0;
1231
1232                                 /* Scan range [zend, ebit) */
1233                                 if (wzend < wbits) {
1234                                         fnd = wnd_scan(buf, wbit,
1235                                                        max(wzend, wpos), wbits,
1236                                                        to_alloc, &prev_tail,
1237                                                        &b_pos, &b_len);
1238                                         if (fnd != MINUS_ONE_T) {
1239                                                 put_bh(bh);
1240                                                 goto found;
1241                                         }
1242                                 }
1243
1244                                 wpos = 0;
1245                                 put_bh(bh);
1246                                 continue;
1247                         }
1248                 }
1249
1250                 /* Current window does not overlap zone */
1251                 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
1252                         /* window is empty */
1253                         if (prev_tail + wbits >= to_alloc) {
1254                                 fnd = wbit + wpos - prev_tail;
1255                                 goto found;
1256                         }
1257
1258                         /* Increase 'prev_tail' and process next window */
1259                         prev_tail += wbits;
1260                         wpos = 0;
1261                         continue;
1262                 }
1263
1264                 /* read window */
1265                 bh = wnd_map(wnd, iw);
1266                 if (IS_ERR(bh)) {
1267                         // TODO: error
1268                         prev_tail = 0;
1269                         wpos = 0;
1270                         continue;
1271                 }
1272
1273                 buf = (ulong *)bh->b_data;
1274
1275                 /* Scan range [wpos, eBits) */
1276                 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
1277                                &b_pos, &b_len);
1278                 put_bh(bh);
1279                 if (fnd != MINUS_ONE_T)
1280                         goto found;
1281         }
1282
1283         if (b_len < prev_tail) {
1284                 /* The last fragment */
1285                 b_len = prev_tail;
1286                 b_pos = max_alloc - prev_tail;
1287         }
1288
1289         if (hint) {
1290                 /*
1291                  * We have scanned range [hint max_alloc)
1292                  * Prepare to scan range [0 hint + to_alloc)
1293                  */
1294                 size_t nextmax = hint + to_alloc;
1295
1296                 if (likely(nextmax >= hint) && nextmax < max_alloc)
1297                         max_alloc = nextmax;
1298                 hint = 0;
1299                 goto Again;
1300         }
1301
1302         if (!b_len)
1303                 goto no_space;
1304
1305         wnd->extent_max = b_len;
1306
1307         if (flags & BITMAP_FIND_FULL)
1308                 goto no_space;
1309
1310         fnd = b_pos;
1311         to_alloc = b_len;
1312
1313 found:
1314         if (flags & BITMAP_FIND_MARK_AS_USED) {
1315                 /* TODO optimize remove extent (pass 'e'?) */
1316                 if (wnd_set_used(wnd, fnd, to_alloc))
1317                         goto no_space;
1318         } else if (wnd->extent_max != MINUS_ONE_T &&
1319                    to_alloc > wnd->extent_max) {
1320                 wnd->extent_max = to_alloc;
1321         }
1322
1323         *allocated = fnd;
1324         return to_alloc;
1325
1326 no_space:
1327         return 0;
1328 }
1329
1330 /*
1331  * wnd_extend
1332  *
1333  * Extend bitmap ($MFT bitmap)
1334  */
1335 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
1336 {
1337         int err;
1338         struct super_block *sb = wnd->sb;
1339         struct ntfs_sb_info *sbi = sb->s_fs_info;
1340         u32 blocksize = sb->s_blocksize;
1341         u32 wbits = blocksize * 8;
1342         u32 b0, new_last;
1343         size_t bits, iw, new_wnd;
1344         size_t old_bits = wnd->nbits;
1345         u16 *new_free;
1346
1347         if (new_bits <= old_bits)
1348                 return -EINVAL;
1349
1350         /* align to 8 byte boundary */
1351         new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
1352         new_last = new_bits & (wbits - 1);
1353         if (!new_last)
1354                 new_last = wbits;
1355
1356         if (new_wnd != wnd->nwnd) {
1357                 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS);
1358                 if (!new_free)
1359                         return -ENOMEM;
1360
1361                 if (new_free != wnd->free_bits)
1362                         memcpy(new_free, wnd->free_bits,
1363                                wnd->nwnd * sizeof(short));
1364                 memset(new_free + wnd->nwnd, 0,
1365                        (new_wnd - wnd->nwnd) * sizeof(short));
1366                 kfree(wnd->free_bits);
1367                 wnd->free_bits = new_free;
1368         }
1369
1370         /* Zero bits [old_bits,new_bits) */
1371         bits = new_bits - old_bits;
1372         b0 = old_bits & (wbits - 1);
1373
1374         for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
1375                 u32 op;
1376                 size_t frb;
1377                 u64 vbo, lbo, bytes;
1378                 struct buffer_head *bh;
1379                 ulong *buf;
1380
1381                 if (iw + 1 == new_wnd)
1382                         wbits = new_last;
1383
1384                 op = b0 + bits > wbits ? wbits - b0 : bits;
1385                 vbo = (u64)iw * blocksize;
1386
1387                 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
1388                 if (err)
1389                         break;
1390
1391                 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1392                 if (!bh)
1393                         return -EIO;
1394
1395                 lock_buffer(bh);
1396                 buf = (ulong *)bh->b_data;
1397
1398                 __bitmap_clear(buf, b0, blocksize * 8 - b0);
1399                 frb = wbits - __bitmap_weight(buf, wbits);
1400                 wnd->total_zeroes += frb - wnd->free_bits[iw];
1401                 wnd->free_bits[iw] = frb;
1402
1403                 set_buffer_uptodate(bh);
1404                 mark_buffer_dirty(bh);
1405                 unlock_buffer(bh);
1406                 /*err = sync_dirty_buffer(bh);*/
1407
1408                 b0 = 0;
1409                 bits -= op;
1410         }
1411
1412         wnd->nbits = new_bits;
1413         wnd->nwnd = new_wnd;
1414         wnd->bits_last = new_last;
1415
1416         wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
1417
1418         return 0;
1419 }
1420
1421 /*
1422  * wnd_zone_set
1423  */
1424 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
1425 {
1426         size_t zlen;
1427
1428         zlen = wnd->zone_end - wnd->zone_bit;
1429         if (zlen)
1430                 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
1431
1432         if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
1433                 wnd_remove_free_ext(wnd, lcn, len);
1434
1435         wnd->zone_bit = lcn;
1436         wnd->zone_end = lcn + len;
1437 }
1438
1439 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
1440 {
1441         int err = 0;
1442         struct super_block *sb = sbi->sb;
1443         struct wnd_bitmap *wnd = &sbi->used.bitmap;
1444         u32 wbits = 8 * sb->s_blocksize;
1445         CLST len = 0, lcn = 0, done = 0;
1446         CLST minlen = bytes_to_cluster(sbi, range->minlen);
1447         CLST lcn_from = bytes_to_cluster(sbi, range->start);
1448         size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
1449         u32 wbit = lcn_from & (wbits - 1);
1450         const ulong *buf;
1451         CLST lcn_to;
1452
1453         if (!minlen)
1454                 minlen = 1;
1455
1456         if (range->len == (u64)-1)
1457                 lcn_to = wnd->nbits;
1458         else
1459                 lcn_to = bytes_to_cluster(sbi, range->start + range->len);
1460
1461         down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
1462
1463         for (; iw < wnd->nbits; iw++, wbit = 0) {
1464                 CLST lcn_wnd = iw * wbits;
1465                 struct buffer_head *bh;
1466
1467                 if (lcn_wnd > lcn_to)
1468                         break;
1469
1470                 if (!wnd->free_bits[iw])
1471                         continue;
1472
1473                 if (iw + 1 == wnd->nwnd)
1474                         wbits = wnd->bits_last;
1475
1476                 if (lcn_wnd + wbits > lcn_to)
1477                         wbits = lcn_to - lcn_wnd;
1478
1479                 bh = wnd_map(wnd, iw);
1480                 if (IS_ERR(bh)) {
1481                         err = PTR_ERR(bh);
1482                         break;
1483                 }
1484
1485                 buf = (ulong *)bh->b_data;
1486
1487                 for (; wbit < wbits; wbit++) {
1488                         if (!test_bit(wbit, buf)) {
1489                                 if (!len)
1490                                         lcn = lcn_wnd + wbit;
1491                                 len += 1;
1492                                 continue;
1493                         }
1494                         if (len >= minlen) {
1495                                 err = ntfs_discard(sbi, lcn, len);
1496                                 if (err)
1497                                         goto out;
1498                                 done += len;
1499                         }
1500                         len = 0;
1501                 }
1502                 put_bh(bh);
1503         }
1504
1505         /* Process the last fragment */
1506         if (len >= minlen) {
1507                 err = ntfs_discard(sbi, lcn, len);
1508                 if (err)
1509                         goto out;
1510                 done += len;
1511         }
1512
1513 out:
1514         range->len = (u64)done << sbi->cluster_bits;
1515
1516         up_read(&wnd->rw_lock);
1517
1518         return err;
1519 }