Merge tag 'ata-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal...
[linux-block.git] / lib / maple_tree.c
CommitLineData
54a611b6
LH
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Maple Tree implementation
4 * Copyright (c) 2018-2022 Oracle Corporation
5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
6 * Matthew Wilcox <willy@infradead.org>
7 */
8
9/*
10 * DOC: Interesting implementation details of the Maple Tree
11 *
12 * Each node type has a number of slots for entries and a number of slots for
13 * pivots. In the case of dense nodes, the pivots are implied by the position
14 * and are simply the slot index + the minimum of the node.
15 *
16 * In regular B-Tree terms, pivots are called keys. The term pivot is used to
17 * indicate that the tree is specifying ranges, Pivots may appear in the
18 * subtree with an entry attached to the value where as keys are unique to a
19 * specific position of a B-tree. Pivot values are inclusive of the slot with
20 * the same index.
21 *
22 *
23 * The following illustrates the layout of a range64 nodes slots and pivots.
24 *
25 *
26 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
27 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬
28 * │ │ │ │ │ │ │ │ └─ Implied maximum
29 * │ │ │ │ │ │ │ └─ Pivot 14
30 * │ │ │ │ │ │ └─ Pivot 13
31 * │ │ │ │ │ └─ Pivot 12
32 * │ │ │ │ └─ Pivot 11
33 * │ │ │ └─ Pivot 2
34 * │ │ └─ Pivot 1
35 * │ └─ Pivot 0
36 * └─ Implied minimum
37 *
38 * Slot contents:
39 * Internal (non-leaf) nodes contain pointers to other nodes.
40 * Leaf nodes contain entries.
41 *
42 * The location of interest is often referred to as an offset. All offsets have
43 * a slot, but the last offset has an implied pivot from the node above (or
44 * UINT_MAX for the root node.
45 *
46 * Ranges complicate certain write activities. When modifying any of
47 * the B-tree variants, it is known that one entry will either be added or
48 * deleted. When modifying the Maple Tree, one store operation may overwrite
49 * the entire data set, or one half of the tree, or the middle half of the tree.
50 *
51 */
52
53
54#include <linux/maple_tree.h>
55#include <linux/xarray.h>
56#include <linux/types.h>
57#include <linux/export.h>
58#include <linux/slab.h>
59#include <linux/limits.h>
60#include <asm/barrier.h>
61
62#define CREATE_TRACE_POINTS
63#include <trace/events/maple_tree.h>
64
65#define MA_ROOT_PARENT 1
66
67/*
68 * Maple state flags
69 * * MA_STATE_BULK - Bulk insert mode
70 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
71 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
72 */
73#define MA_STATE_BULK 1
74#define MA_STATE_REBALANCE 2
75#define MA_STATE_PREALLOC 4
76
77#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
78#define ma_mnode_ptr(x) ((struct maple_node *)(x))
79#define ma_enode_ptr(x) ((struct maple_enode *)(x))
80static struct kmem_cache *maple_node_cache;
81
82#ifdef CONFIG_DEBUG_MAPLE_TREE
83static const unsigned long mt_max[] = {
84 [maple_dense] = MAPLE_NODE_SLOTS,
85 [maple_leaf_64] = ULONG_MAX,
86 [maple_range_64] = ULONG_MAX,
87 [maple_arange_64] = ULONG_MAX,
88};
89#define mt_node_max(x) mt_max[mte_node_type(x)]
90#endif
91
92static const unsigned char mt_slots[] = {
93 [maple_dense] = MAPLE_NODE_SLOTS,
94 [maple_leaf_64] = MAPLE_RANGE64_SLOTS,
95 [maple_range_64] = MAPLE_RANGE64_SLOTS,
96 [maple_arange_64] = MAPLE_ARANGE64_SLOTS,
97};
98#define mt_slot_count(x) mt_slots[mte_node_type(x)]
99
100static const unsigned char mt_pivots[] = {
101 [maple_dense] = 0,
102 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1,
103 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1,
104 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1,
105};
106#define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
107
108static const unsigned char mt_min_slots[] = {
109 [maple_dense] = MAPLE_NODE_SLOTS / 2,
110 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
111 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
112 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1,
113};
114#define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
115
116#define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2)
117#define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1)
118
119struct maple_big_node {
120 struct maple_pnode *parent;
121 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
122 union {
123 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
124 struct {
125 unsigned long padding[MAPLE_BIG_NODE_GAPS];
126 unsigned long gap[MAPLE_BIG_NODE_GAPS];
127 };
128 };
129 unsigned char b_end;
130 enum maple_type type;
131};
132
133/*
134 * The maple_subtree_state is used to build a tree to replace a segment of an
135 * existing tree in a more atomic way. Any walkers of the older tree will hit a
136 * dead node and restart on updates.
137 */
138struct maple_subtree_state {
139 struct ma_state *orig_l; /* Original left side of subtree */
140 struct ma_state *orig_r; /* Original right side of subtree */
141 struct ma_state *l; /* New left side of subtree */
142 struct ma_state *m; /* New middle of subtree (rare) */
143 struct ma_state *r; /* New right side of subtree */
144 struct ma_topiary *free; /* nodes to be freed */
145 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */
146 struct maple_big_node *bn;
147};
148
44081c77
AB
149#ifdef CONFIG_KASAN_STACK
150/* Prevent mas_wr_bnode() from exceeding the stack frame limit */
151#define noinline_for_kasan noinline_for_stack
152#else
153#define noinline_for_kasan inline
154#endif
155
54a611b6
LH
156/* Functions */
157static inline struct maple_node *mt_alloc_one(gfp_t gfp)
158{
541e06b7 159 return kmem_cache_alloc(maple_node_cache, gfp);
54a611b6
LH
160}
161
162static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
163{
541e06b7 164 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
54a611b6
LH
165}
166
167static inline void mt_free_bulk(size_t size, void __rcu **nodes)
168{
169 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
170}
171
172static void mt_free_rcu(struct rcu_head *head)
173{
174 struct maple_node *node = container_of(head, struct maple_node, rcu);
175
176 kmem_cache_free(maple_node_cache, node);
177}
178
179/*
180 * ma_free_rcu() - Use rcu callback to free a maple node
181 * @node: The node to free
182 *
183 * The maple tree uses the parent pointer to indicate this node is no longer in
184 * use and will be freed.
185 */
186static void ma_free_rcu(struct maple_node *node)
187{
c13af03d 188 WARN_ON(node->parent != ma_parent_ptr(node));
54a611b6
LH
189 call_rcu(&node->rcu, mt_free_rcu);
190}
191
54a611b6
LH
192static void mas_set_height(struct ma_state *mas)
193{
194 unsigned int new_flags = mas->tree->ma_flags;
195
196 new_flags &= ~MT_FLAGS_HEIGHT_MASK;
197 BUG_ON(mas->depth > MAPLE_HEIGHT_MAX);
198 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
199 mas->tree->ma_flags = new_flags;
200}
201
202static unsigned int mas_mt_height(struct ma_state *mas)
203{
204 return mt_height(mas->tree);
205}
206
207static inline enum maple_type mte_node_type(const struct maple_enode *entry)
208{
209 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
210 MAPLE_NODE_TYPE_MASK;
211}
212
213static inline bool ma_is_dense(const enum maple_type type)
214{
215 return type < maple_leaf_64;
216}
217
218static inline bool ma_is_leaf(const enum maple_type type)
219{
220 return type < maple_range_64;
221}
222
223static inline bool mte_is_leaf(const struct maple_enode *entry)
224{
225 return ma_is_leaf(mte_node_type(entry));
226}
227
228/*
229 * We also reserve values with the bottom two bits set to '10' which are
230 * below 4096
231 */
232static inline bool mt_is_reserved(const void *entry)
233{
234 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
235 xa_is_internal(entry);
236}
237
238static inline void mas_set_err(struct ma_state *mas, long err)
239{
240 mas->node = MA_ERROR(err);
241}
242
243static inline bool mas_is_ptr(struct ma_state *mas)
244{
245 return mas->node == MAS_ROOT;
246}
247
248static inline bool mas_is_start(struct ma_state *mas)
249{
250 return mas->node == MAS_START;
251}
252
253bool mas_is_err(struct ma_state *mas)
254{
255 return xa_is_err(mas->node);
256}
257
258static inline bool mas_searchable(struct ma_state *mas)
259{
260 if (mas_is_none(mas))
261 return false;
262
263 if (mas_is_ptr(mas))
264 return false;
265
266 return true;
267}
268
269static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
270{
271 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
272}
273
274/*
275 * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
276 * @entry: The maple encoded node
277 *
278 * Return: a maple topiary pointer
279 */
280static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
281{
282 return (struct maple_topiary *)
283 ((unsigned long)entry & ~MAPLE_NODE_MASK);
284}
285
286/*
287 * mas_mn() - Get the maple state node.
288 * @mas: The maple state
289 *
290 * Return: the maple node (not encoded - bare pointer).
291 */
292static inline struct maple_node *mas_mn(const struct ma_state *mas)
293{
294 return mte_to_node(mas->node);
295}
296
297/*
298 * mte_set_node_dead() - Set a maple encoded node as dead.
299 * @mn: The maple encoded node.
300 */
301static inline void mte_set_node_dead(struct maple_enode *mn)
302{
303 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
304 smp_wmb(); /* Needed for RCU */
305}
306
307/* Bit 1 indicates the root is a node */
308#define MAPLE_ROOT_NODE 0x02
309/* maple_type stored bit 3-6 */
310#define MAPLE_ENODE_TYPE_SHIFT 0x03
311/* Bit 2 means a NULL somewhere below */
312#define MAPLE_ENODE_NULL 0x04
313
314static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
315 enum maple_type type)
316{
317 return (void *)((unsigned long)node |
318 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
319}
320
321static inline void *mte_mk_root(const struct maple_enode *node)
322{
323 return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
324}
325
326static inline void *mte_safe_root(const struct maple_enode *node)
327{
328 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
329}
330
6e7ba8b5 331static inline void *mte_set_full(const struct maple_enode *node)
54a611b6 332{
6e7ba8b5 333 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
54a611b6
LH
334}
335
6e7ba8b5 336static inline void *mte_clear_full(const struct maple_enode *node)
54a611b6 337{
6e7ba8b5
LH
338 return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
339}
340
341static inline bool mte_has_null(const struct maple_enode *node)
342{
343 return (unsigned long)node & MAPLE_ENODE_NULL;
54a611b6
LH
344}
345
346static inline bool ma_is_root(struct maple_node *node)
347{
348 return ((unsigned long)node->parent & MA_ROOT_PARENT);
349}
350
351static inline bool mte_is_root(const struct maple_enode *node)
352{
353 return ma_is_root(mte_to_node(node));
354}
355
356static inline bool mas_is_root_limits(const struct ma_state *mas)
357{
358 return !mas->min && mas->max == ULONG_MAX;
359}
360
361static inline bool mt_is_alloc(struct maple_tree *mt)
362{
363 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
364}
365
366/*
367 * The Parent Pointer
368 * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
369 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16
370 * bit values need an extra bit to store the offset. This extra bit comes from
371 * a reuse of the last bit in the node type. This is possible by using bit 1 to
372 * indicate if bit 2 is part of the type or the slot.
373 *
374 * Note types:
375 * 0x??1 = Root
376 * 0x?00 = 16 bit nodes
377 * 0x010 = 32 bit nodes
378 * 0x110 = 64 bit nodes
379 *
380 * Slot size and alignment
381 * 0b??1 : Root
382 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7
383 * 0b010 : 32 bit values, type in 0-2, slot in 3-7
384 * 0b110 : 64 bit values, type in 0-2, slot in 3-7
385 */
386
387#define MAPLE_PARENT_ROOT 0x01
388
389#define MAPLE_PARENT_SLOT_SHIFT 0x03
390#define MAPLE_PARENT_SLOT_MASK 0xF8
391
392#define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
393#define MAPLE_PARENT_16B_SLOT_MASK 0xFC
394
395#define MAPLE_PARENT_RANGE64 0x06
396#define MAPLE_PARENT_RANGE32 0x04
397#define MAPLE_PARENT_NOT_RANGE16 0x02
398
399/*
400 * mte_parent_shift() - Get the parent shift for the slot storage.
401 * @parent: The parent pointer cast as an unsigned long
402 * Return: The shift into that pointer to the star to of the slot
403 */
404static inline unsigned long mte_parent_shift(unsigned long parent)
405{
406 /* Note bit 1 == 0 means 16B */
407 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
408 return MAPLE_PARENT_SLOT_SHIFT;
409
410 return MAPLE_PARENT_16B_SLOT_SHIFT;
411}
412
413/*
414 * mte_parent_slot_mask() - Get the slot mask for the parent.
415 * @parent: The parent pointer cast as an unsigned long.
416 * Return: The slot mask for that parent.
417 */
418static inline unsigned long mte_parent_slot_mask(unsigned long parent)
419{
420 /* Note bit 1 == 0 means 16B */
421 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
422 return MAPLE_PARENT_SLOT_MASK;
423
424 return MAPLE_PARENT_16B_SLOT_MASK;
425}
426
427/*
428 * mas_parent_enum() - Return the maple_type of the parent from the stored
429 * parent type.
430 * @mas: The maple state
431 * @node: The maple_enode to extract the parent's enum
432 * Return: The node->parent maple_type
433 */
434static inline
435enum maple_type mte_parent_enum(struct maple_enode *p_enode,
436 struct maple_tree *mt)
437{
438 unsigned long p_type;
439
440 p_type = (unsigned long)p_enode;
441 if (p_type & MAPLE_PARENT_ROOT)
442 return 0; /* Validated in the caller. */
443
444 p_type &= MAPLE_NODE_MASK;
445 p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
446
447 switch (p_type) {
448 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
449 if (mt_is_alloc(mt))
450 return maple_arange_64;
451 return maple_range_64;
452 }
453
454 return 0;
455}
456
457static inline
458enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode)
459{
460 return mte_parent_enum(ma_enode_ptr(mte_to_node(enode)->parent), mas->tree);
461}
462
463/*
464 * mte_set_parent() - Set the parent node and encode the slot
465 * @enode: The encoded maple node.
466 * @parent: The encoded maple node that is the parent of @enode.
467 * @slot: The slot that @enode resides in @parent.
468 *
469 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
470 * parent type.
471 */
472static inline
473void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent,
474 unsigned char slot)
475{
831978e3 476 unsigned long val = (unsigned long)parent;
54a611b6
LH
477 unsigned long shift;
478 unsigned long type;
479 enum maple_type p_type = mte_node_type(parent);
480
481 BUG_ON(p_type == maple_dense);
482 BUG_ON(p_type == maple_leaf_64);
483
484 switch (p_type) {
485 case maple_range_64:
486 case maple_arange_64:
487 shift = MAPLE_PARENT_SLOT_SHIFT;
488 type = MAPLE_PARENT_RANGE64;
489 break;
490 default:
491 case maple_dense:
492 case maple_leaf_64:
493 shift = type = 0;
494 break;
495 }
496
497 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
498 val |= (slot << shift) | type;
499 mte_to_node(enode)->parent = ma_parent_ptr(val);
500}
501
502/*
503 * mte_parent_slot() - get the parent slot of @enode.
504 * @enode: The encoded maple node.
505 *
506 * Return: The slot in the parent node where @enode resides.
507 */
508static inline unsigned int mte_parent_slot(const struct maple_enode *enode)
509{
831978e3 510 unsigned long val = (unsigned long)mte_to_node(enode)->parent;
54a611b6 511
84fd3e1e 512 if (val & MA_ROOT_PARENT)
54a611b6
LH
513 return 0;
514
515 /*
516 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
517 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
518 */
519 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
520}
521
522/*
523 * mte_parent() - Get the parent of @node.
524 * @node: The encoded maple node.
525 *
526 * Return: The parent maple node.
527 */
528static inline struct maple_node *mte_parent(const struct maple_enode *enode)
529{
530 return (void *)((unsigned long)
531 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
532}
533
534/*
535 * ma_dead_node() - check if the @enode is dead.
536 * @enode: The encoded maple node
537 *
538 * Return: true if dead, false otherwise.
539 */
540static inline bool ma_dead_node(const struct maple_node *node)
541{
0a2b18d9 542 struct maple_node *parent;
54a611b6 543
0a2b18d9
LH
544 /* Do not reorder reads from the node prior to the parent check */
545 smp_rmb();
546 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
54a611b6
LH
547 return (parent == node);
548}
39d0bd86 549
54a611b6
LH
550/*
551 * mte_dead_node() - check if the @enode is dead.
552 * @enode: The encoded maple node
553 *
554 * Return: true if dead, false otherwise.
555 */
556static inline bool mte_dead_node(const struct maple_enode *enode)
557{
558 struct maple_node *parent, *node;
559
560 node = mte_to_node(enode);
0a2b18d9
LH
561 /* Do not reorder reads from the node prior to the parent check */
562 smp_rmb();
54a611b6
LH
563 parent = mte_parent(enode);
564 return (parent == node);
565}
566
567/*
568 * mas_allocated() - Get the number of nodes allocated in a maple state.
569 * @mas: The maple state
570 *
571 * The ma_state alloc member is overloaded to hold a pointer to the first
572 * allocated node or to the number of requested nodes to allocate. If bit 0 is
573 * set, then the alloc contains the number of requested nodes. If there is an
574 * allocated node, then the total allocated nodes is in that node.
575 *
576 * Return: The total number of nodes allocated
577 */
578static inline unsigned long mas_allocated(const struct ma_state *mas)
579{
580 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
581 return 0;
582
583 return mas->alloc->total;
584}
585
586/*
587 * mas_set_alloc_req() - Set the requested number of allocations.
588 * @mas: the maple state
589 * @count: the number of allocations.
590 *
591 * The requested number of allocations is either in the first allocated node,
592 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
593 * no allocated node. Set the request either in the node or do the necessary
594 * encoding to store in @mas->alloc directly.
595 */
596static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
597{
598 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
599 if (!count)
600 mas->alloc = NULL;
601 else
602 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
603 return;
604 }
605
606 mas->alloc->request_count = count;
607}
608
609/*
610 * mas_alloc_req() - get the requested number of allocations.
611 * @mas: The maple state
612 *
613 * The alloc count is either stored directly in @mas, or in
614 * @mas->alloc->request_count if there is at least one node allocated. Decode
615 * the request count if it's stored directly in @mas->alloc.
616 *
617 * Return: The allocation request count.
618 */
619static inline unsigned int mas_alloc_req(const struct ma_state *mas)
620{
621 if ((unsigned long)mas->alloc & 0x1)
622 return (unsigned long)(mas->alloc) >> 1;
623 else if (mas->alloc)
624 return mas->alloc->request_count;
625 return 0;
626}
627
628/*
629 * ma_pivots() - Get a pointer to the maple node pivots.
630 * @node - the maple node
631 * @type - the node type
632 *
39d0bd86
LH
633 * In the event of a dead node, this array may be %NULL
634 *
54a611b6
LH
635 * Return: A pointer to the maple node pivots
636 */
637static inline unsigned long *ma_pivots(struct maple_node *node,
638 enum maple_type type)
639{
640 switch (type) {
641 case maple_arange_64:
642 return node->ma64.pivot;
643 case maple_range_64:
644 case maple_leaf_64:
645 return node->mr64.pivot;
646 case maple_dense:
647 return NULL;
648 }
649 return NULL;
650}
651
652/*
653 * ma_gaps() - Get a pointer to the maple node gaps.
654 * @node - the maple node
655 * @type - the node type
656 *
657 * Return: A pointer to the maple node gaps
658 */
659static inline unsigned long *ma_gaps(struct maple_node *node,
660 enum maple_type type)
661{
662 switch (type) {
663 case maple_arange_64:
664 return node->ma64.gap;
665 case maple_range_64:
666 case maple_leaf_64:
667 case maple_dense:
668 return NULL;
669 }
670 return NULL;
671}
672
673/*
674 * mte_pivot() - Get the pivot at @piv of the maple encoded node.
675 * @mn: The maple encoded node.
676 * @piv: The pivot.
677 *
678 * Return: the pivot at @piv of @mn.
679 */
680static inline unsigned long mte_pivot(const struct maple_enode *mn,
681 unsigned char piv)
682{
683 struct maple_node *node = mte_to_node(mn);
ab6ef70a 684 enum maple_type type = mte_node_type(mn);
54a611b6 685
ab6ef70a 686 if (piv >= mt_pivots[type]) {
54a611b6
LH
687 WARN_ON(1);
688 return 0;
689 }
ab6ef70a 690 switch (type) {
54a611b6
LH
691 case maple_arange_64:
692 return node->ma64.pivot[piv];
693 case maple_range_64:
694 case maple_leaf_64:
695 return node->mr64.pivot[piv];
696 case maple_dense:
697 return 0;
698 }
699 return 0;
700}
701
702/*
703 * mas_safe_pivot() - get the pivot at @piv or mas->max.
704 * @mas: The maple state
705 * @pivots: The pointer to the maple node pivots
706 * @piv: The pivot to fetch
707 * @type: The maple node type
708 *
709 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
710 * otherwise.
711 */
712static inline unsigned long
713mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
714 unsigned char piv, enum maple_type type)
715{
716 if (piv >= mt_pivots[type])
717 return mas->max;
718
719 return pivots[piv];
720}
721
722/*
723 * mas_safe_min() - Return the minimum for a given offset.
724 * @mas: The maple state
725 * @pivots: The pointer to the maple node pivots
726 * @offset: The offset into the pivot array
727 *
728 * Return: The minimum range value that is contained in @offset.
729 */
730static inline unsigned long
731mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
732{
733 if (likely(offset))
734 return pivots[offset - 1] + 1;
735
736 return mas->min;
737}
738
739/*
740 * mas_logical_pivot() - Get the logical pivot of a given offset.
741 * @mas: The maple state
742 * @pivots: The pointer to the maple node pivots
743 * @offset: The offset into the pivot array
744 * @type: The maple node type
745 *
746 * When there is no value at a pivot (beyond the end of the data), then the
747 * pivot is actually @mas->max.
748 *
749 * Return: the logical pivot of a given @offset.
750 */
751static inline unsigned long
752mas_logical_pivot(struct ma_state *mas, unsigned long *pivots,
753 unsigned char offset, enum maple_type type)
754{
755 unsigned long lpiv = mas_safe_pivot(mas, pivots, offset, type);
756
757 if (likely(lpiv))
758 return lpiv;
759
760 if (likely(offset))
761 return mas->max;
762
763 return lpiv;
764}
765
766/*
767 * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
768 * @mn: The encoded maple node
769 * @piv: The pivot offset
770 * @val: The value of the pivot
771 */
772static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
773 unsigned long val)
774{
775 struct maple_node *node = mte_to_node(mn);
776 enum maple_type type = mte_node_type(mn);
777
778 BUG_ON(piv >= mt_pivots[type]);
779 switch (type) {
780 default:
781 case maple_range_64:
782 case maple_leaf_64:
783 node->mr64.pivot[piv] = val;
784 break;
785 case maple_arange_64:
786 node->ma64.pivot[piv] = val;
787 break;
788 case maple_dense:
789 break;
790 }
791
792}
793
794/*
795 * ma_slots() - Get a pointer to the maple node slots.
796 * @mn: The maple node
797 * @mt: The maple node type
798 *
799 * Return: A pointer to the maple node slots
800 */
801static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
802{
803 switch (mt) {
804 default:
805 case maple_arange_64:
806 return mn->ma64.slot;
807 case maple_range_64:
808 case maple_leaf_64:
809 return mn->mr64.slot;
810 case maple_dense:
811 return mn->slot;
812 }
813}
814
815static inline bool mt_locked(const struct maple_tree *mt)
816{
817 return mt_external_lock(mt) ? mt_lock_is_held(mt) :
818 lockdep_is_held(&mt->ma_lock);
819}
820
821static inline void *mt_slot(const struct maple_tree *mt,
822 void __rcu **slots, unsigned char offset)
823{
824 return rcu_dereference_check(slots[offset], mt_locked(mt));
825}
826
790e1fa8
LH
827static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
828 unsigned char offset)
829{
830 return rcu_dereference_protected(slots[offset], mt_locked(mt));
831}
54a611b6
LH
832/*
833 * mas_slot_locked() - Get the slot value when holding the maple tree lock.
834 * @mas: The maple state
835 * @slots: The pointer to the slots
836 * @offset: The offset into the slots array to fetch
837 *
838 * Return: The entry stored in @slots at the @offset.
839 */
840static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
841 unsigned char offset)
842{
790e1fa8 843 return mt_slot_locked(mas->tree, slots, offset);
54a611b6
LH
844}
845
846/*
847 * mas_slot() - Get the slot value when not holding the maple tree lock.
848 * @mas: The maple state
849 * @slots: The pointer to the slots
850 * @offset: The offset into the slots array to fetch
851 *
852 * Return: The entry stored in @slots at the @offset
853 */
854static inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
855 unsigned char offset)
856{
857 return mt_slot(mas->tree, slots, offset);
858}
859
860/*
861 * mas_root() - Get the maple tree root.
862 * @mas: The maple state.
863 *
864 * Return: The pointer to the root of the tree
865 */
866static inline void *mas_root(struct ma_state *mas)
867{
868 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
869}
870
871static inline void *mt_root_locked(struct maple_tree *mt)
872{
873 return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
874}
875
876/*
877 * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
878 * @mas: The maple state.
879 *
880 * Return: The pointer to the root of the tree
881 */
882static inline void *mas_root_locked(struct ma_state *mas)
883{
884 return mt_root_locked(mas->tree);
885}
886
887static inline struct maple_metadata *ma_meta(struct maple_node *mn,
888 enum maple_type mt)
889{
890 switch (mt) {
891 case maple_arange_64:
892 return &mn->ma64.meta;
893 default:
894 return &mn->mr64.meta;
895 }
896}
897
898/*
899 * ma_set_meta() - Set the metadata information of a node.
900 * @mn: The maple node
901 * @mt: The maple node type
902 * @offset: The offset of the highest sub-gap in this node.
903 * @end: The end of the data in this node.
904 */
905static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
906 unsigned char offset, unsigned char end)
907{
908 struct maple_metadata *meta = ma_meta(mn, mt);
909
910 meta->gap = offset;
911 meta->end = end;
912}
913
2e5b4921 914/*
790e1fa8
LH
915 * mt_clear_meta() - clear the metadata information of a node, if it exists
916 * @mt: The maple tree
2e5b4921 917 * @mn: The maple node
790e1fa8 918 * @type: The maple node type
2e5b4921
LH
919 * @offset: The offset of the highest sub-gap in this node.
920 * @end: The end of the data in this node.
921 */
790e1fa8
LH
922static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
923 enum maple_type type)
2e5b4921
LH
924{
925 struct maple_metadata *meta;
926 unsigned long *pivots;
927 void __rcu **slots;
928 void *next;
929
790e1fa8 930 switch (type) {
2e5b4921
LH
931 case maple_range_64:
932 pivots = mn->mr64.pivot;
933 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
934 slots = mn->mr64.slot;
790e1fa8
LH
935 next = mt_slot_locked(mt, slots,
936 MAPLE_RANGE64_SLOTS - 1);
937 if (unlikely((mte_to_node(next) &&
938 mte_node_type(next))))
939 return; /* no metadata, could be node */
2e5b4921
LH
940 }
941 fallthrough;
942 case maple_arange_64:
790e1fa8 943 meta = ma_meta(mn, type);
2e5b4921
LH
944 break;
945 default:
946 return;
947 }
948
949 meta->gap = 0;
950 meta->end = 0;
951}
952
54a611b6
LH
953/*
954 * ma_meta_end() - Get the data end of a node from the metadata
955 * @mn: The maple node
956 * @mt: The maple node type
957 */
958static inline unsigned char ma_meta_end(struct maple_node *mn,
959 enum maple_type mt)
960{
961 struct maple_metadata *meta = ma_meta(mn, mt);
962
963 return meta->end;
964}
965
966/*
967 * ma_meta_gap() - Get the largest gap location of a node from the metadata
968 * @mn: The maple node
969 * @mt: The maple node type
970 */
971static inline unsigned char ma_meta_gap(struct maple_node *mn,
972 enum maple_type mt)
973{
974 BUG_ON(mt != maple_arange_64);
975
976 return mn->ma64.meta.gap;
977}
978
979/*
980 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
981 * @mn: The maple node
982 * @mn: The maple node type
983 * @offset: The location of the largest gap.
984 */
985static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
986 unsigned char offset)
987{
988
989 struct maple_metadata *meta = ma_meta(mn, mt);
990
991 meta->gap = offset;
992}
993
994/*
995 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
996 * @mat - the ma_topiary, a linked list of dead nodes.
997 * @dead_enode - the node to be marked as dead and added to the tail of the list
998 *
999 * Add the @dead_enode to the linked list in @mat.
1000 */
1001static inline void mat_add(struct ma_topiary *mat,
1002 struct maple_enode *dead_enode)
1003{
1004 mte_set_node_dead(dead_enode);
1005 mte_to_mat(dead_enode)->next = NULL;
1006 if (!mat->tail) {
1007 mat->tail = mat->head = dead_enode;
1008 return;
1009 }
1010
1011 mte_to_mat(mat->tail)->next = dead_enode;
1012 mat->tail = dead_enode;
1013}
1014
1015static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
1016static inline void mas_free(struct ma_state *mas, struct maple_enode *used);
1017
1018/*
1019 * mas_mat_free() - Free all nodes in a dead list.
1020 * @mas - the maple state
1021 * @mat - the ma_topiary linked list of dead nodes to free.
1022 *
1023 * Free walk a dead list.
1024 */
1025static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat)
1026{
1027 struct maple_enode *next;
1028
1029 while (mat->head) {
1030 next = mte_to_mat(mat->head)->next;
1031 mas_free(mas, mat->head);
1032 mat->head = next;
1033 }
1034}
1035
1036/*
1037 * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
1038 * @mas - the maple state
1039 * @mat - the ma_topiary linked list of dead nodes to free.
1040 *
1041 * Destroy walk a dead list.
1042 */
1043static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
1044{
1045 struct maple_enode *next;
1046
1047 while (mat->head) {
1048 next = mte_to_mat(mat->head)->next;
1049 mte_destroy_walk(mat->head, mat->mtree);
1050 mat->head = next;
1051 }
1052}
1053/*
1054 * mas_descend() - Descend into the slot stored in the ma_state.
1055 * @mas - the maple state.
1056 *
1057 * Note: Not RCU safe, only use in write side or debug code.
1058 */
1059static inline void mas_descend(struct ma_state *mas)
1060{
1061 enum maple_type type;
1062 unsigned long *pivots;
1063 struct maple_node *node;
1064 void __rcu **slots;
1065
1066 node = mas_mn(mas);
1067 type = mte_node_type(mas->node);
1068 pivots = ma_pivots(node, type);
1069 slots = ma_slots(node, type);
1070
1071 if (mas->offset)
1072 mas->min = pivots[mas->offset - 1] + 1;
1073 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
1074 mas->node = mas_slot(mas, slots, mas->offset);
1075}
1076
1077/*
1078 * mte_set_gap() - Set a maple node gap.
1079 * @mn: The encoded maple node
1080 * @gap: The offset of the gap to set
1081 * @val: The gap value
1082 */
1083static inline void mte_set_gap(const struct maple_enode *mn,
1084 unsigned char gap, unsigned long val)
1085{
1086 switch (mte_node_type(mn)) {
1087 default:
1088 break;
1089 case maple_arange_64:
1090 mte_to_node(mn)->ma64.gap[gap] = val;
1091 break;
1092 }
1093}
1094
1095/*
1096 * mas_ascend() - Walk up a level of the tree.
1097 * @mas: The maple state
1098 *
1099 * Sets the @mas->max and @mas->min to the correct values when walking up. This
1100 * may cause several levels of walking up to find the correct min and max.
1101 * May find a dead node which will cause a premature return.
1102 * Return: 1 on dead node, 0 otherwise
1103 */
1104static int mas_ascend(struct ma_state *mas)
1105{
1106 struct maple_enode *p_enode; /* parent enode. */
1107 struct maple_enode *a_enode; /* ancestor enode. */
1108 struct maple_node *a_node; /* ancestor node. */
1109 struct maple_node *p_node; /* parent node. */
1110 unsigned char a_slot;
1111 enum maple_type a_type;
1112 unsigned long min, max;
1113 unsigned long *pivots;
1114 unsigned char offset;
1115 bool set_max = false, set_min = false;
1116
1117 a_node = mas_mn(mas);
1118 if (ma_is_root(a_node)) {
1119 mas->offset = 0;
1120 return 0;
1121 }
1122
1123 p_node = mte_parent(mas->node);
1124 if (unlikely(a_node == p_node))
1125 return 1;
1126 a_type = mas_parent_enum(mas, mas->node);
1127 offset = mte_parent_slot(mas->node);
1128 a_enode = mt_mk_node(p_node, a_type);
1129
1130 /* Check to make sure all parent information is still accurate */
1131 if (p_node != mte_parent(mas->node))
1132 return 1;
1133
1134 mas->node = a_enode;
1135 mas->offset = offset;
1136
1137 if (mte_is_root(a_enode)) {
1138 mas->max = ULONG_MAX;
1139 mas->min = 0;
1140 return 0;
1141 }
1142
1143 min = 0;
1144 max = ULONG_MAX;
1145 do {
1146 p_enode = a_enode;
1147 a_type = mas_parent_enum(mas, p_enode);
1148 a_node = mte_parent(p_enode);
1149 a_slot = mte_parent_slot(p_enode);
54a611b6 1150 a_enode = mt_mk_node(a_node, a_type);
39d0bd86
LH
1151 pivots = ma_pivots(a_node, a_type);
1152
1153 if (unlikely(ma_dead_node(a_node)))
1154 return 1;
54a611b6
LH
1155
1156 if (!set_min && a_slot) {
1157 set_min = true;
1158 min = pivots[a_slot - 1] + 1;
1159 }
1160
1161 if (!set_max && a_slot < mt_pivots[a_type]) {
1162 set_max = true;
1163 max = pivots[a_slot];
1164 }
1165
1166 if (unlikely(ma_dead_node(a_node)))
1167 return 1;
1168
1169 if (unlikely(ma_is_root(a_node)))
1170 break;
1171
1172 } while (!set_min || !set_max);
1173
1174 mas->max = max;
1175 mas->min = min;
1176 return 0;
1177}
1178
1179/*
1180 * mas_pop_node() - Get a previously allocated maple node from the maple state.
1181 * @mas: The maple state
1182 *
1183 * Return: A pointer to a maple node.
1184 */
1185static inline struct maple_node *mas_pop_node(struct ma_state *mas)
1186{
1187 struct maple_alloc *ret, *node = mas->alloc;
1188 unsigned long total = mas_allocated(mas);
541e06b7 1189 unsigned int req = mas_alloc_req(mas);
54a611b6
LH
1190
1191 /* nothing or a request pending. */
541e06b7 1192 if (WARN_ON(!total))
54a611b6
LH
1193 return NULL;
1194
1195 if (total == 1) {
1196 /* single allocation in this ma_state */
1197 mas->alloc = NULL;
1198 ret = node;
1199 goto single_node;
1200 }
1201
541e06b7 1202 if (node->node_count == 1) {
54a611b6
LH
1203 /* Single allocation in this node. */
1204 mas->alloc = node->slot[0];
54a611b6
LH
1205 mas->alloc->total = node->total - 1;
1206 ret = node;
1207 goto new_head;
1208 }
54a611b6 1209 node->total--;
541e06b7
LH
1210 ret = node->slot[--node->node_count];
1211 node->slot[node->node_count] = NULL;
54a611b6
LH
1212
1213single_node:
1214new_head:
541e06b7
LH
1215 if (req) {
1216 req++;
1217 mas_set_alloc_req(mas, req);
54a611b6 1218 }
541e06b7
LH
1219
1220 memset(ret, 0, sizeof(*ret));
54a611b6
LH
1221 return (struct maple_node *)ret;
1222}
1223
1224/*
1225 * mas_push_node() - Push a node back on the maple state allocation.
1226 * @mas: The maple state
1227 * @used: The used maple node
1228 *
1229 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and
1230 * requested node count as necessary.
1231 */
1232static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
1233{
1234 struct maple_alloc *reuse = (struct maple_alloc *)used;
1235 struct maple_alloc *head = mas->alloc;
1236 unsigned long count;
1237 unsigned int requested = mas_alloc_req(mas);
1238
54a611b6
LH
1239 count = mas_allocated(mas);
1240
541e06b7
LH
1241 reuse->request_count = 0;
1242 reuse->node_count = 0;
1243 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
1244 head->slot[head->node_count++] = reuse;
54a611b6
LH
1245 head->total++;
1246 goto done;
1247 }
1248
1249 reuse->total = 1;
1250 if ((head) && !((unsigned long)head & 0x1)) {
54a611b6 1251 reuse->slot[0] = head;
541e06b7 1252 reuse->node_count = 1;
54a611b6
LH
1253 reuse->total += head->total;
1254 }
1255
1256 mas->alloc = reuse;
1257done:
1258 if (requested > 1)
1259 mas_set_alloc_req(mas, requested - 1);
1260}
1261
1262/*
1263 * mas_alloc_nodes() - Allocate nodes into a maple state
1264 * @mas: The maple state
1265 * @gfp: The GFP Flags
1266 */
1267static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1268{
1269 struct maple_alloc *node;
54a611b6 1270 unsigned long allocated = mas_allocated(mas);
54a611b6
LH
1271 unsigned int requested = mas_alloc_req(mas);
1272 unsigned int count;
1273 void **slots = NULL;
1274 unsigned int max_req = 0;
1275
1276 if (!requested)
1277 return;
1278
1279 mas_set_alloc_req(mas, 0);
1280 if (mas->mas_flags & MA_STATE_PREALLOC) {
1281 if (allocated)
1282 return;
1283 WARN_ON(!allocated);
1284 }
1285
541e06b7 1286 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
54a611b6
LH
1287 node = (struct maple_alloc *)mt_alloc_one(gfp);
1288 if (!node)
1289 goto nomem_one;
1290
541e06b7 1291 if (allocated) {
54a611b6 1292 node->slot[0] = mas->alloc;
541e06b7
LH
1293 node->node_count = 1;
1294 } else {
1295 node->node_count = 0;
1296 }
54a611b6 1297
54a611b6 1298 mas->alloc = node;
541e06b7 1299 node->total = ++allocated;
54a611b6
LH
1300 requested--;
1301 }
1302
1303 node = mas->alloc;
541e06b7 1304 node->request_count = 0;
54a611b6 1305 while (requested) {
1f5f12ec
PZ
1306 max_req = MAPLE_ALLOC_SLOTS - node->node_count;
1307 slots = (void **)&node->slot[node->node_count];
54a611b6
LH
1308 max_req = min(requested, max_req);
1309 count = mt_alloc_bulk(gfp, max_req, slots);
1310 if (!count)
1311 goto nomem_bulk;
1312
1f5f12ec
PZ
1313 if (node->node_count == 0) {
1314 node->slot[0]->node_count = 0;
1315 node->slot[0]->request_count = 0;
1316 }
1317
54a611b6 1318 node->node_count += count;
541e06b7 1319 allocated += count;
c61b3a2b 1320 node = node->slot[0];
54a611b6
LH
1321 requested -= count;
1322 }
541e06b7 1323 mas->alloc->total = allocated;
54a611b6
LH
1324 return;
1325
1326nomem_bulk:
1327 /* Clean up potential freed allocations on bulk failure */
1328 memset(slots, 0, max_req * sizeof(unsigned long));
1329nomem_one:
1330 mas_set_alloc_req(mas, requested);
1331 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
541e06b7 1332 mas->alloc->total = allocated;
54a611b6 1333 mas_set_err(mas, -ENOMEM);
54a611b6
LH
1334}
1335
1336/*
1337 * mas_free() - Free an encoded maple node
1338 * @mas: The maple state
1339 * @used: The encoded maple node to free.
1340 *
1341 * Uses rcu free if necessary, pushes @used back on the maple state allocations
1342 * otherwise.
1343 */
1344static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1345{
1346 struct maple_node *tmp = mte_to_node(used);
1347
1348 if (mt_in_rcu(mas->tree))
1349 ma_free_rcu(tmp);
1350 else
1351 mas_push_node(mas, tmp);
1352}
1353
1354/*
1355 * mas_node_count() - Check if enough nodes are allocated and request more if
1356 * there is not enough nodes.
1357 * @mas: The maple state
1358 * @count: The number of nodes needed
1359 * @gfp: the gfp flags
1360 */
1361static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
1362{
1363 unsigned long allocated = mas_allocated(mas);
1364
1365 if (allocated < count) {
1366 mas_set_alloc_req(mas, count - allocated);
1367 mas_alloc_nodes(mas, gfp);
1368 }
1369}
1370
1371/*
1372 * mas_node_count() - Check if enough nodes are allocated and request more if
1373 * there is not enough nodes.
1374 * @mas: The maple state
1375 * @count: The number of nodes needed
1376 *
1377 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
1378 */
1379static void mas_node_count(struct ma_state *mas, int count)
1380{
1381 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
1382}
1383
1384/*
1385 * mas_start() - Sets up maple state for operations.
1386 * @mas: The maple state.
1387 *
46b34584 1388 * If mas->node == MAS_START, then set the min, max and depth to
54a611b6
LH
1389 * defaults.
1390 *
1391 * Return:
1392 * - If mas->node is an error or not MAS_START, return NULL.
1393 * - If it's an empty tree: NULL & mas->node == MAS_NONE
1394 * - If it's a single entry: The entry & mas->node == MAS_ROOT
1395 * - If it's a tree: NULL & mas->node == safe root node.
1396 */
1397static inline struct maple_enode *mas_start(struct ma_state *mas)
1398{
1399 if (likely(mas_is_start(mas))) {
1400 struct maple_enode *root;
1401
54a611b6
LH
1402 mas->min = 0;
1403 mas->max = ULONG_MAX;
1404 mas->depth = 0;
54a611b6 1405
a7b92d59 1406retry:
54a611b6
LH
1407 root = mas_root(mas);
1408 /* Tree with nodes */
1409 if (likely(xa_is_node(root))) {
9bbba563 1410 mas->depth = 1;
54a611b6 1411 mas->node = mte_safe_root(root);
46b34584 1412 mas->offset = 0;
a7b92d59
LH
1413 if (mte_dead_node(mas->node))
1414 goto retry;
1415
54a611b6
LH
1416 return NULL;
1417 }
1418
1419 /* empty tree */
1420 if (unlikely(!root)) {
46b34584 1421 mas->node = MAS_NONE;
54a611b6
LH
1422 mas->offset = MAPLE_NODE_SLOTS;
1423 return NULL;
1424 }
1425
1426 /* Single entry tree */
1427 mas->node = MAS_ROOT;
1428 mas->offset = MAPLE_NODE_SLOTS;
1429
1430 /* Single entry tree. */
1431 if (mas->index > 0)
1432 return NULL;
1433
1434 return root;
1435 }
1436
1437 return NULL;
1438}
1439
1440/*
1441 * ma_data_end() - Find the end of the data in a node.
1442 * @node: The maple node
1443 * @type: The maple node type
1444 * @pivots: The array of pivots in the node
1445 * @max: The maximum value in the node
1446 *
1447 * Uses metadata to find the end of the data when possible.
1448 * Return: The zero indexed last slot with data (may be null).
1449 */
1450static inline unsigned char ma_data_end(struct maple_node *node,
1451 enum maple_type type,
1452 unsigned long *pivots,
1453 unsigned long max)
1454{
1455 unsigned char offset;
1456
39d0bd86
LH
1457 if (!pivots)
1458 return 0;
1459
54a611b6
LH
1460 if (type == maple_arange_64)
1461 return ma_meta_end(node, type);
1462
1463 offset = mt_pivots[type] - 1;
1464 if (likely(!pivots[offset]))
1465 return ma_meta_end(node, type);
1466
1467 if (likely(pivots[offset] == max))
1468 return offset;
1469
1470 return mt_pivots[type];
1471}
1472
1473/*
1474 * mas_data_end() - Find the end of the data (slot).
1475 * @mas: the maple state
1476 *
1477 * This method is optimized to check the metadata of a node if the node type
1478 * supports data end metadata.
1479 *
1480 * Return: The zero indexed last slot with data (may be null).
1481 */
1482static inline unsigned char mas_data_end(struct ma_state *mas)
1483{
1484 enum maple_type type;
1485 struct maple_node *node;
1486 unsigned char offset;
1487 unsigned long *pivots;
1488
1489 type = mte_node_type(mas->node);
1490 node = mas_mn(mas);
1491 if (type == maple_arange_64)
1492 return ma_meta_end(node, type);
1493
1494 pivots = ma_pivots(node, type);
39d0bd86
LH
1495 if (unlikely(ma_dead_node(node)))
1496 return 0;
1497
54a611b6
LH
1498 offset = mt_pivots[type] - 1;
1499 if (likely(!pivots[offset]))
1500 return ma_meta_end(node, type);
1501
1502 if (likely(pivots[offset] == mas->max))
1503 return offset;
1504
1505 return mt_pivots[type];
1506}
1507
1508/*
1509 * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1510 * @mas - the maple state
1511 *
1512 * Return: The maximum gap in the leaf.
1513 */
1514static unsigned long mas_leaf_max_gap(struct ma_state *mas)
1515{
1516 enum maple_type mt;
1517 unsigned long pstart, gap, max_gap;
1518 struct maple_node *mn;
1519 unsigned long *pivots;
1520 void __rcu **slots;
1521 unsigned char i;
1522 unsigned char max_piv;
1523
1524 mt = mte_node_type(mas->node);
1525 mn = mas_mn(mas);
1526 slots = ma_slots(mn, mt);
1527 max_gap = 0;
1528 if (unlikely(ma_is_dense(mt))) {
1529 gap = 0;
1530 for (i = 0; i < mt_slots[mt]; i++) {
1531 if (slots[i]) {
1532 if (gap > max_gap)
1533 max_gap = gap;
1534 gap = 0;
1535 } else {
1536 gap++;
1537 }
1538 }
1539 if (gap > max_gap)
1540 max_gap = gap;
1541 return max_gap;
1542 }
1543
1544 /*
1545 * Check the first implied pivot optimizes the loop below and slot 1 may
1546 * be skipped if there is a gap in slot 0.
1547 */
1548 pivots = ma_pivots(mn, mt);
1549 if (likely(!slots[0])) {
1550 max_gap = pivots[0] - mas->min + 1;
1551 i = 2;
1552 } else {
1553 i = 1;
1554 }
1555
1556 /* reduce max_piv as the special case is checked before the loop */
1557 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
1558 /*
1559 * Check end implied pivot which can only be a gap on the right most
1560 * node.
1561 */
1562 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
1563 gap = ULONG_MAX - pivots[max_piv];
1564 if (gap > max_gap)
1565 max_gap = gap;
1566 }
1567
1568 for (; i <= max_piv; i++) {
1569 /* data == no gap. */
1570 if (likely(slots[i]))
1571 continue;
1572
1573 pstart = pivots[i - 1];
1574 gap = pivots[i] - pstart;
1575 if (gap > max_gap)
1576 max_gap = gap;
1577
1578 /* There cannot be two gaps in a row. */
1579 i++;
1580 }
1581 return max_gap;
1582}
1583
1584/*
1585 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1586 * @node: The maple node
1587 * @gaps: The pointer to the gaps
1588 * @mt: The maple node type
1589 * @*off: Pointer to store the offset location of the gap.
1590 *
1591 * Uses the metadata data end to scan backwards across set gaps.
1592 *
1593 * Return: The maximum gap value
1594 */
1595static inline unsigned long
1596ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1597 unsigned char *off)
1598{
1599 unsigned char offset, i;
1600 unsigned long max_gap = 0;
1601
1602 i = offset = ma_meta_end(node, mt);
1603 do {
1604 if (gaps[i] > max_gap) {
1605 max_gap = gaps[i];
1606 offset = i;
1607 }
1608 } while (i--);
1609
1610 *off = offset;
1611 return max_gap;
1612}
1613
1614/*
1615 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1616 * @mas: The maple state.
1617 *
1618 * If the metadata gap is set to MAPLE_ARANGE64_META_MAX, there is no gap.
1619 *
1620 * Return: The gap value.
1621 */
1622static inline unsigned long mas_max_gap(struct ma_state *mas)
1623{
1624 unsigned long *gaps;
1625 unsigned char offset;
1626 enum maple_type mt;
1627 struct maple_node *node;
1628
1629 mt = mte_node_type(mas->node);
1630 if (ma_is_leaf(mt))
1631 return mas_leaf_max_gap(mas);
1632
1633 node = mas_mn(mas);
1634 offset = ma_meta_gap(node, mt);
1635 if (offset == MAPLE_ARANGE64_META_MAX)
1636 return 0;
1637
1638 gaps = ma_gaps(node, mt);
1639 return gaps[offset];
1640}
1641
1642/*
1643 * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1644 * @mas: The maple state
1645 * @offset: The gap offset in the parent to set
1646 * @new: The new gap value.
1647 *
1648 * Set the parent gap then continue to set the gap upwards, using the metadata
1649 * of the parent to see if it is necessary to check the node above.
1650 */
1651static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1652 unsigned long new)
1653{
1654 unsigned long meta_gap = 0;
1655 struct maple_node *pnode;
1656 struct maple_enode *penode;
1657 unsigned long *pgaps;
1658 unsigned char meta_offset;
1659 enum maple_type pmt;
1660
1661 pnode = mte_parent(mas->node);
1662 pmt = mas_parent_enum(mas, mas->node);
1663 penode = mt_mk_node(pnode, pmt);
1664 pgaps = ma_gaps(pnode, pmt);
1665
1666ascend:
1667 meta_offset = ma_meta_gap(pnode, pmt);
1668 if (meta_offset == MAPLE_ARANGE64_META_MAX)
1669 meta_gap = 0;
1670 else
1671 meta_gap = pgaps[meta_offset];
1672
1673 pgaps[offset] = new;
1674
1675 if (meta_gap == new)
1676 return;
1677
1678 if (offset != meta_offset) {
1679 if (meta_gap > new)
1680 return;
1681
1682 ma_set_meta_gap(pnode, pmt, offset);
1683 } else if (new < meta_gap) {
1684 meta_offset = 15;
1685 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1686 ma_set_meta_gap(pnode, pmt, meta_offset);
1687 }
1688
1689 if (ma_is_root(pnode))
1690 return;
1691
1692 /* Go to the parent node. */
1693 pnode = mte_parent(penode);
1694 pmt = mas_parent_enum(mas, penode);
1695 pgaps = ma_gaps(pnode, pmt);
1696 offset = mte_parent_slot(penode);
1697 penode = mt_mk_node(pnode, pmt);
1698 goto ascend;
1699}
1700
1701/*
1702 * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1703 * @mas - the maple state.
1704 */
1705static inline void mas_update_gap(struct ma_state *mas)
1706{
1707 unsigned char pslot;
1708 unsigned long p_gap;
1709 unsigned long max_gap;
1710
1711 if (!mt_is_alloc(mas->tree))
1712 return;
1713
1714 if (mte_is_root(mas->node))
1715 return;
1716
1717 max_gap = mas_max_gap(mas);
1718
1719 pslot = mte_parent_slot(mas->node);
1720 p_gap = ma_gaps(mte_parent(mas->node),
1721 mas_parent_enum(mas, mas->node))[pslot];
1722
1723 if (p_gap != max_gap)
1724 mas_parent_gap(mas, pslot, max_gap);
1725}
1726
1727/*
1728 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1729 * @parent with the slot encoded.
1730 * @mas - the maple state (for the tree)
1731 * @parent - the maple encoded node containing the children.
1732 */
1733static inline void mas_adopt_children(struct ma_state *mas,
1734 struct maple_enode *parent)
1735{
1736 enum maple_type type = mte_node_type(parent);
1737 struct maple_node *node = mas_mn(mas);
1738 void __rcu **slots = ma_slots(node, type);
1739 unsigned long *pivots = ma_pivots(node, type);
1740 struct maple_enode *child;
1741 unsigned char offset;
1742
1743 offset = ma_data_end(node, type, pivots, mas->max);
1744 do {
1745 child = mas_slot_locked(mas, slots, offset);
1746 mte_set_parent(child, parent, offset);
1747 } while (offset--);
1748}
1749
1750/*
1751 * mas_replace() - Replace a maple node in the tree with mas->node. Uses the
1752 * parent encoding to locate the maple node in the tree.
1753 * @mas - the ma_state to use for operations.
1754 * @advanced - boolean to adopt the child nodes and free the old node (false) or
1755 * leave the node (true) and handle the adoption and free elsewhere.
1756 */
1757static inline void mas_replace(struct ma_state *mas, bool advanced)
1758 __must_hold(mas->tree->lock)
1759{
1760 struct maple_node *mn = mas_mn(mas);
1761 struct maple_enode *old_enode;
1762 unsigned char offset = 0;
1763 void __rcu **slots = NULL;
1764
1765 if (ma_is_root(mn)) {
1766 old_enode = mas_root_locked(mas);
1767 } else {
1768 offset = mte_parent_slot(mas->node);
1769 slots = ma_slots(mte_parent(mas->node),
1770 mas_parent_enum(mas, mas->node));
1771 old_enode = mas_slot_locked(mas, slots, offset);
1772 }
1773
1774 if (!advanced && !mte_is_leaf(mas->node))
1775 mas_adopt_children(mas, mas->node);
1776
1777 if (mte_is_root(mas->node)) {
1778 mn->parent = ma_parent_ptr(
1779 ((unsigned long)mas->tree | MA_ROOT_PARENT));
1780 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1781 mas_set_height(mas);
1782 } else {
1783 rcu_assign_pointer(slots[offset], mas->node);
1784 }
1785
c13af03d
LH
1786 if (!advanced) {
1787 mte_set_node_dead(old_enode);
54a611b6 1788 mas_free(mas, old_enode);
c13af03d 1789 }
54a611b6
LH
1790}
1791
1792/*
1793 * mas_new_child() - Find the new child of a node.
1794 * @mas: the maple state
1795 * @child: the maple state to store the child.
1796 */
1797static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child)
1798 __must_hold(mas->tree->lock)
1799{
1800 enum maple_type mt;
1801 unsigned char offset;
1802 unsigned char end;
1803 unsigned long *pivots;
1804 struct maple_enode *entry;
1805 struct maple_node *node;
1806 void __rcu **slots;
1807
1808 mt = mte_node_type(mas->node);
1809 node = mas_mn(mas);
1810 slots = ma_slots(node, mt);
1811 pivots = ma_pivots(node, mt);
1812 end = ma_data_end(node, mt, pivots, mas->max);
1813 for (offset = mas->offset; offset <= end; offset++) {
1814 entry = mas_slot_locked(mas, slots, offset);
1815 if (mte_parent(entry) == node) {
1816 *child = *mas;
1817 mas->offset = offset + 1;
1818 child->offset = offset;
1819 mas_descend(child);
1820 child->offset = 0;
1821 return true;
1822 }
1823 }
1824 return false;
1825}
1826
1827/*
1828 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
1829 * old data or set b_node->b_end.
1830 * @b_node: the maple_big_node
1831 * @shift: the shift count
1832 */
1833static inline void mab_shift_right(struct maple_big_node *b_node,
1834 unsigned char shift)
1835{
1836 unsigned long size = b_node->b_end * sizeof(unsigned long);
1837
1838 memmove(b_node->pivot + shift, b_node->pivot, size);
1839 memmove(b_node->slot + shift, b_node->slot, size);
1840 if (b_node->type == maple_arange_64)
1841 memmove(b_node->gap + shift, b_node->gap, size);
1842}
1843
1844/*
1845 * mab_middle_node() - Check if a middle node is needed (unlikely)
1846 * @b_node: the maple_big_node that contains the data.
1847 * @size: the amount of data in the b_node
1848 * @split: the potential split location
1849 * @slot_count: the size that can be stored in a single node being considered.
1850 *
1851 * Return: true if a middle node is required.
1852 */
1853static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
1854 unsigned char slot_count)
1855{
1856 unsigned char size = b_node->b_end;
1857
1858 if (size >= 2 * slot_count)
1859 return true;
1860
1861 if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
1862 return true;
1863
1864 return false;
1865}
1866
1867/*
1868 * mab_no_null_split() - ensure the split doesn't fall on a NULL
1869 * @b_node: the maple_big_node with the data
1870 * @split: the suggested split location
1871 * @slot_count: the number of slots in the node being considered.
1872 *
1873 * Return: the split location.
1874 */
1875static inline int mab_no_null_split(struct maple_big_node *b_node,
1876 unsigned char split, unsigned char slot_count)
1877{
1878 if (!b_node->slot[split]) {
1879 /*
1880 * If the split is less than the max slot && the right side will
1881 * still be sufficient, then increment the split on NULL.
1882 */
1883 if ((split < slot_count - 1) &&
1884 (b_node->b_end - split) > (mt_min_slots[b_node->type]))
1885 split++;
1886 else
1887 split--;
1888 }
1889 return split;
1890}
1891
1892/*
1893 * mab_calc_split() - Calculate the split location and if there needs to be two
1894 * splits.
1895 * @bn: The maple_big_node with the data
1896 * @mid_split: The second split, if required. 0 otherwise.
1897 *
1898 * Return: The first split location. The middle split is set in @mid_split.
1899 */
1900static inline int mab_calc_split(struct ma_state *mas,
1901 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
1902{
1903 unsigned char b_end = bn->b_end;
1904 int split = b_end / 2; /* Assume equal split. */
1905 unsigned char slot_min, slot_count = mt_slots[bn->type];
1906
1907 /*
1908 * To support gap tracking, all NULL entries are kept together and a node cannot
1909 * end on a NULL entry, with the exception of the left-most leaf. The
1910 * limitation means that the split of a node must be checked for this condition
1911 * and be able to put more data in one direction or the other.
1912 */
1913 if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
1914 *mid_split = 0;
1915 split = b_end - mt_min_slots[bn->type];
1916
1917 if (!ma_is_leaf(bn->type))
1918 return split;
1919
1920 mas->mas_flags |= MA_STATE_REBALANCE;
1921 if (!bn->slot[split])
1922 split--;
1923 return split;
1924 }
1925
1926 /*
1927 * Although extremely rare, it is possible to enter what is known as the 3-way
1928 * split scenario. The 3-way split comes about by means of a store of a range
1929 * that overwrites the end and beginning of two full nodes. The result is a set
1930 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can
1931 * also be located in different parent nodes which are also full. This can
1932 * carry upwards all the way to the root in the worst case.
1933 */
1934 if (unlikely(mab_middle_node(bn, split, slot_count))) {
1935 split = b_end / 3;
1936 *mid_split = split * 2;
1937 } else {
1938 slot_min = mt_min_slots[bn->type];
1939
1940 *mid_split = 0;
1941 /*
1942 * Avoid having a range less than the slot count unless it
1943 * causes one node to be deficient.
1944 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
1945 */
1946 while (((bn->pivot[split] - min) < slot_count - 1) &&
1947 (split < slot_count - 1) && (b_end - split > slot_min))
1948 split++;
1949 }
1950
1951 /* Avoid ending a node on a NULL entry */
1952 split = mab_no_null_split(bn, split, slot_count);
54a611b6 1953
e11cb683
VY
1954 if (unlikely(*mid_split))
1955 *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
54a611b6
LH
1956
1957 return split;
1958}
1959
1960/*
1961 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
1962 * and set @b_node->b_end to the next free slot.
1963 * @mas: The maple state
1964 * @mas_start: The starting slot to copy
1965 * @mas_end: The end slot to copy (inclusively)
1966 * @b_node: The maple_big_node to place the data
1967 * @mab_start: The starting location in maple_big_node to store the data.
1968 */
1969static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
1970 unsigned char mas_end, struct maple_big_node *b_node,
1971 unsigned char mab_start)
1972{
1973 enum maple_type mt;
1974 struct maple_node *node;
1975 void __rcu **slots;
1976 unsigned long *pivots, *gaps;
1977 int i = mas_start, j = mab_start;
1978 unsigned char piv_end;
1979
1980 node = mas_mn(mas);
1981 mt = mte_node_type(mas->node);
1982 pivots = ma_pivots(node, mt);
1983 if (!i) {
1984 b_node->pivot[j] = pivots[i++];
1985 if (unlikely(i > mas_end))
1986 goto complete;
1987 j++;
1988 }
1989
1990 piv_end = min(mas_end, mt_pivots[mt]);
1991 for (; i < piv_end; i++, j++) {
1992 b_node->pivot[j] = pivots[i];
1993 if (unlikely(!b_node->pivot[j]))
1994 break;
1995
1996 if (unlikely(mas->max == b_node->pivot[j]))
1997 goto complete;
1998 }
1999
2000 if (likely(i <= mas_end))
2001 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
2002
2003complete:
2004 b_node->b_end = ++j;
2005 j -= mab_start;
2006 slots = ma_slots(node, mt);
2007 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
2008 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
2009 gaps = ma_gaps(node, mt);
2010 memcpy(b_node->gap + mab_start, gaps + mas_start,
2011 sizeof(unsigned long) * j);
2012 }
2013}
2014
2015/*
2016 * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
2017 * @mas: The maple state
2018 * @node: The maple node
2019 * @pivots: pointer to the maple node pivots
2020 * @mt: The maple type
2021 * @end: The assumed end
2022 *
2023 * Note, end may be incremented within this function but not modified at the
2024 * source. This is fine since the metadata is the last thing to be stored in a
2025 * node during a write.
2026 */
2027static inline void mas_leaf_set_meta(struct ma_state *mas,
2028 struct maple_node *node, unsigned long *pivots,
2029 enum maple_type mt, unsigned char end)
2030{
2031 /* There is no room for metadata already */
2032 if (mt_pivots[mt] <= end)
2033 return;
2034
2035 if (pivots[end] && pivots[end] < mas->max)
2036 end++;
2037
2038 if (end < mt_slots[mt] - 1)
2039 ma_set_meta(node, mt, 0, end);
2040}
2041
2042/*
2043 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
2044 * @b_node: the maple_big_node that has the data
2045 * @mab_start: the start location in @b_node.
2046 * @mab_end: The end location in @b_node (inclusively)
2047 * @mas: The maple state with the maple encoded node.
2048 */
2049static inline void mab_mas_cp(struct maple_big_node *b_node,
2050 unsigned char mab_start, unsigned char mab_end,
2051 struct ma_state *mas, bool new_max)
2052{
2053 int i, j = 0;
2054 enum maple_type mt = mte_node_type(mas->node);
2055 struct maple_node *node = mte_to_node(mas->node);
2056 void __rcu **slots = ma_slots(node, mt);
2057 unsigned long *pivots = ma_pivots(node, mt);
2058 unsigned long *gaps = NULL;
2059 unsigned char end;
2060
2061 if (mab_end - mab_start > mt_pivots[mt])
2062 mab_end--;
2063
2064 if (!pivots[mt_pivots[mt] - 1])
2065 slots[mt_pivots[mt]] = NULL;
2066
2067 i = mab_start;
2068 do {
2069 pivots[j++] = b_node->pivot[i++];
2070 } while (i <= mab_end && likely(b_node->pivot[i]));
2071
2072 memcpy(slots, b_node->slot + mab_start,
2073 sizeof(void *) * (i - mab_start));
2074
2075 if (new_max)
2076 mas->max = b_node->pivot[i - 1];
2077
2078 end = j - 1;
2079 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
2080 unsigned long max_gap = 0;
2081 unsigned char offset = 15;
2082
2083 gaps = ma_gaps(node, mt);
2084 do {
2085 gaps[--j] = b_node->gap[--i];
2086 if (gaps[j] > max_gap) {
2087 offset = j;
2088 max_gap = gaps[j];
2089 }
2090 } while (j);
2091
2092 ma_set_meta(node, mt, offset, end);
2093 } else {
2094 mas_leaf_set_meta(mas, node, pivots, mt, end);
2095 }
2096}
2097
2098/*
2099 * mas_descend_adopt() - Descend through a sub-tree and adopt children.
2100 * @mas: the maple state with the maple encoded node of the sub-tree.
2101 *
2102 * Descend through a sub-tree and adopt children who do not have the correct
2103 * parents set. Follow the parents which have the correct parents as they are
2104 * the new entries which need to be followed to find other incorrectly set
2105 * parents.
2106 */
2107static inline void mas_descend_adopt(struct ma_state *mas)
2108{
2109 struct ma_state list[3], next[3];
2110 int i, n;
2111
2112 /*
2113 * At each level there may be up to 3 correct parent pointers which indicates
2114 * the new nodes which need to be walked to find any new nodes at a lower level.
2115 */
2116
2117 for (i = 0; i < 3; i++) {
2118 list[i] = *mas;
2119 list[i].offset = 0;
2120 next[i].offset = 0;
2121 }
2122 next[0] = *mas;
2123
2124 while (!mte_is_leaf(list[0].node)) {
2125 n = 0;
2126 for (i = 0; i < 3; i++) {
2127 if (mas_is_none(&list[i]))
2128 continue;
2129
2130 if (i && list[i-1].node == list[i].node)
2131 continue;
2132
2133 while ((n < 3) && (mas_new_child(&list[i], &next[n])))
2134 n++;
2135
2136 mas_adopt_children(&list[i], list[i].node);
2137 }
2138
2139 while (n < 3)
2140 next[n++].node = MAS_NONE;
2141
2142 /* descend by setting the list to the children */
2143 for (i = 0; i < 3; i++)
2144 list[i] = next[i];
2145 }
2146}
2147
2148/*
2149 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
2150 * @mas: The maple state
2151 * @end: The maple node end
2152 * @mt: The maple node type
2153 */
2154static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
2155 enum maple_type mt)
2156{
2157 if (!(mas->mas_flags & MA_STATE_BULK))
2158 return;
2159
2160 if (mte_is_root(mas->node))
2161 return;
2162
2163 if (end > mt_min_slots[mt]) {
2164 mas->mas_flags &= ~MA_STATE_REBALANCE;
2165 return;
2166 }
2167}
2168
2169/*
2170 * mas_store_b_node() - Store an @entry into the b_node while also copying the
2171 * data from a maple encoded node.
2172 * @wr_mas: the maple write state
2173 * @b_node: the maple_big_node to fill with data
2174 * @offset_end: the offset to end copying
2175 *
2176 * Return: The actual end of the data stored in @b_node
2177 */
44081c77 2178static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
54a611b6
LH
2179 struct maple_big_node *b_node, unsigned char offset_end)
2180{
2181 unsigned char slot;
2182 unsigned char b_end;
2183 /* Possible underflow of piv will wrap back to 0 before use. */
2184 unsigned long piv;
2185 struct ma_state *mas = wr_mas->mas;
2186
2187 b_node->type = wr_mas->type;
2188 b_end = 0;
2189 slot = mas->offset;
2190 if (slot) {
2191 /* Copy start data up to insert. */
2192 mas_mab_cp(mas, 0, slot - 1, b_node, 0);
2193 b_end = b_node->b_end;
2194 piv = b_node->pivot[b_end - 1];
2195 } else
2196 piv = mas->min - 1;
2197
2198 if (piv + 1 < mas->index) {
2199 /* Handle range starting after old range */
2200 b_node->slot[b_end] = wr_mas->content;
2201 if (!wr_mas->content)
2202 b_node->gap[b_end] = mas->index - 1 - piv;
2203 b_node->pivot[b_end++] = mas->index - 1;
2204 }
2205
2206 /* Store the new entry. */
2207 mas->offset = b_end;
2208 b_node->slot[b_end] = wr_mas->entry;
2209 b_node->pivot[b_end] = mas->last;
2210
2211 /* Appended. */
2212 if (mas->last >= mas->max)
2213 goto b_end;
2214
2215 /* Handle new range ending before old range ends */
2216 piv = mas_logical_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
2217 if (piv > mas->last) {
2218 if (piv == ULONG_MAX)
2219 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
2220
2221 if (offset_end != slot)
2222 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2223 offset_end);
2224
2225 b_node->slot[++b_end] = wr_mas->content;
2226 if (!wr_mas->content)
2227 b_node->gap[b_end] = piv - mas->last + 1;
2228 b_node->pivot[b_end] = piv;
2229 }
2230
2231 slot = offset_end + 1;
2232 if (slot > wr_mas->node_end)
2233 goto b_end;
2234
2235 /* Copy end data to the end of the node. */
2236 mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end);
2237 b_node->b_end--;
2238 return;
2239
2240b_end:
2241 b_node->b_end = b_end;
2242}
2243
2244/*
2245 * mas_prev_sibling() - Find the previous node with the same parent.
2246 * @mas: the maple state
2247 *
2248 * Return: True if there is a previous sibling, false otherwise.
2249 */
2250static inline bool mas_prev_sibling(struct ma_state *mas)
2251{
2252 unsigned int p_slot = mte_parent_slot(mas->node);
2253
2254 if (mte_is_root(mas->node))
2255 return false;
2256
2257 if (!p_slot)
2258 return false;
2259
2260 mas_ascend(mas);
2261 mas->offset = p_slot - 1;
2262 mas_descend(mas);
2263 return true;
2264}
2265
2266/*
2267 * mas_next_sibling() - Find the next node with the same parent.
2268 * @mas: the maple state
2269 *
2270 * Return: true if there is a next sibling, false otherwise.
2271 */
2272static inline bool mas_next_sibling(struct ma_state *mas)
2273{
2274 MA_STATE(parent, mas->tree, mas->index, mas->last);
2275
2276 if (mte_is_root(mas->node))
2277 return false;
2278
2279 parent = *mas;
2280 mas_ascend(&parent);
2281 parent.offset = mte_parent_slot(mas->node) + 1;
2282 if (parent.offset > mas_data_end(&parent))
2283 return false;
2284
2285 *mas = parent;
2286 mas_descend(mas);
2287 return true;
2288}
2289
2290/*
2291 * mte_node_or_node() - Return the encoded node or MAS_NONE.
2292 * @enode: The encoded maple node.
2293 *
2294 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state.
2295 *
2296 * Return: @enode or MAS_NONE
2297 */
2298static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
2299{
2300 if (enode)
2301 return enode;
2302
2303 return ma_enode_ptr(MAS_NONE);
2304}
2305
2306/*
2307 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
2308 * @wr_mas: The maple write state
2309 *
2310 * Uses mas_slot_locked() and does not need to worry about dead nodes.
2311 */
2312static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
2313{
2314 struct ma_state *mas = wr_mas->mas;
2315 unsigned char count;
2316 unsigned char offset;
2317 unsigned long index, min, max;
2318
2319 if (unlikely(ma_is_dense(wr_mas->type))) {
2320 wr_mas->r_max = wr_mas->r_min = mas->index;
2321 mas->offset = mas->index = mas->min;
2322 return;
2323 }
2324
2325 wr_mas->node = mas_mn(wr_mas->mas);
2326 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
2327 count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type,
2328 wr_mas->pivots, mas->max);
2329 offset = mas->offset;
2330 min = mas_safe_min(mas, wr_mas->pivots, offset);
2331 if (unlikely(offset == count))
2332 goto max;
2333
2334 max = wr_mas->pivots[offset];
2335 index = mas->index;
2336 if (unlikely(index <= max))
2337 goto done;
2338
2339 if (unlikely(!max && offset))
2340 goto max;
2341
2342 min = max + 1;
2343 while (++offset < count) {
2344 max = wr_mas->pivots[offset];
2345 if (index <= max)
2346 goto done;
2347 else if (unlikely(!max))
2348 break;
2349
2350 min = max + 1;
2351 }
2352
2353max:
2354 max = mas->max;
2355done:
2356 wr_mas->r_max = max;
2357 wr_mas->r_min = min;
2358 wr_mas->offset_end = mas->offset = offset;
2359}
2360
2361/*
2362 * mas_topiary_range() - Add a range of slots to the topiary.
2363 * @mas: The maple state
2364 * @destroy: The topiary to add the slots (usually destroy)
2365 * @start: The starting slot inclusively
2366 * @end: The end slot inclusively
2367 */
2368static inline void mas_topiary_range(struct ma_state *mas,
2369 struct ma_topiary *destroy, unsigned char start, unsigned char end)
2370{
2371 void __rcu **slots;
2372 unsigned char offset;
2373
2374 MT_BUG_ON(mas->tree, mte_is_leaf(mas->node));
2375 slots = ma_slots(mas_mn(mas), mte_node_type(mas->node));
2376 for (offset = start; offset <= end; offset++) {
2377 struct maple_enode *enode = mas_slot_locked(mas, slots, offset);
2378
2379 if (mte_dead_node(enode))
2380 continue;
2381
2382 mat_add(destroy, enode);
2383 }
2384}
2385
2386/*
2387 * mast_topiary() - Add the portions of the tree to the removal list; either to
2388 * be freed or discarded (destroy walk).
2389 * @mast: The maple_subtree_state.
2390 */
2391static inline void mast_topiary(struct maple_subtree_state *mast)
2392{
2393 MA_WR_STATE(wr_mas, mast->orig_l, NULL);
2394 unsigned char r_start, r_end;
2395 unsigned char l_start, l_end;
2396 void __rcu **l_slots, **r_slots;
2397
2398 wr_mas.type = mte_node_type(mast->orig_l->node);
2399 mast->orig_l->index = mast->orig_l->last;
2400 mas_wr_node_walk(&wr_mas);
2401 l_start = mast->orig_l->offset + 1;
2402 l_end = mas_data_end(mast->orig_l);
2403 r_start = 0;
2404 r_end = mast->orig_r->offset;
2405
2406 if (r_end)
2407 r_end--;
2408
2409 l_slots = ma_slots(mas_mn(mast->orig_l),
2410 mte_node_type(mast->orig_l->node));
2411
2412 r_slots = ma_slots(mas_mn(mast->orig_r),
2413 mte_node_type(mast->orig_r->node));
2414
2415 if ((l_start < l_end) &&
2416 mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) {
2417 l_start++;
2418 }
2419
2420 if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) {
2421 if (r_end)
2422 r_end--;
2423 }
2424
2425 if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node))
2426 return;
2427
2428 /* At the node where left and right sides meet, add the parts between */
2429 if (mast->orig_l->node == mast->orig_r->node) {
2430 return mas_topiary_range(mast->orig_l, mast->destroy,
2431 l_start, r_end);
2432 }
2433
2434 /* mast->orig_r is different and consumed. */
2435 if (mte_is_leaf(mast->orig_r->node))
2436 return;
2437
2438 if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end)))
2439 l_end--;
2440
2441
2442 if (l_start <= l_end)
2443 mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end);
2444
2445 if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start)))
2446 r_start++;
2447
2448 if (r_start <= r_end)
2449 mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end);
2450}
2451
2452/*
2453 * mast_rebalance_next() - Rebalance against the next node
2454 * @mast: The maple subtree state
2455 * @old_r: The encoded maple node to the right (next node).
2456 */
2457static inline void mast_rebalance_next(struct maple_subtree_state *mast)
2458{
2459 unsigned char b_end = mast->bn->b_end;
2460
2461 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
2462 mast->bn, b_end);
2463 mast->orig_r->last = mast->orig_r->max;
2464}
2465
2466/*
2467 * mast_rebalance_prev() - Rebalance against the previous node
2468 * @mast: The maple subtree state
2469 * @old_l: The encoded maple node to the left (previous node)
2470 */
2471static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
2472{
2473 unsigned char end = mas_data_end(mast->orig_l) + 1;
2474 unsigned char b_end = mast->bn->b_end;
2475
2476 mab_shift_right(mast->bn, end);
2477 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
2478 mast->l->min = mast->orig_l->min;
2479 mast->orig_l->index = mast->orig_l->min;
2480 mast->bn->b_end = end + b_end;
2481 mast->l->offset += end;
2482}
2483
2484/*
2485 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
2486 * the node to the right. Checking the nodes to the right then the left at each
2487 * level upwards until root is reached. Free and destroy as needed.
2488 * Data is copied into the @mast->bn.
2489 * @mast: The maple_subtree_state.
2490 */
2491static inline
2492bool mast_spanning_rebalance(struct maple_subtree_state *mast)
2493{
2494 struct ma_state r_tmp = *mast->orig_r;
2495 struct ma_state l_tmp = *mast->orig_l;
2496 struct maple_enode *ancestor = NULL;
2497 unsigned char start, end;
2498 unsigned char depth = 0;
2499
2500 r_tmp = *mast->orig_r;
2501 l_tmp = *mast->orig_l;
2502 do {
2503 mas_ascend(mast->orig_r);
2504 mas_ascend(mast->orig_l);
2505 depth++;
2506 if (!ancestor &&
2507 (mast->orig_r->node == mast->orig_l->node)) {
2508 ancestor = mast->orig_r->node;
2509 end = mast->orig_r->offset - 1;
2510 start = mast->orig_l->offset + 1;
2511 }
2512
2513 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
2514 if (!ancestor) {
2515 ancestor = mast->orig_r->node;
2516 start = 0;
2517 }
2518
2519 mast->orig_r->offset++;
2520 do {
2521 mas_descend(mast->orig_r);
2522 mast->orig_r->offset = 0;
2523 depth--;
2524 } while (depth);
2525
2526 mast_rebalance_next(mast);
2527 do {
2528 unsigned char l_off = 0;
2529 struct maple_enode *child = r_tmp.node;
2530
2531 mas_ascend(&r_tmp);
2532 if (ancestor == r_tmp.node)
2533 l_off = start;
2534
2535 if (r_tmp.offset)
2536 r_tmp.offset--;
2537
2538 if (l_off < r_tmp.offset)
2539 mas_topiary_range(&r_tmp, mast->destroy,
2540 l_off, r_tmp.offset);
2541
2542 if (l_tmp.node != child)
2543 mat_add(mast->free, child);
2544
2545 } while (r_tmp.node != ancestor);
2546
2547 *mast->orig_l = l_tmp;
2548 return true;
2549
2550 } else if (mast->orig_l->offset != 0) {
2551 if (!ancestor) {
2552 ancestor = mast->orig_l->node;
2553 end = mas_data_end(mast->orig_l);
2554 }
2555
2556 mast->orig_l->offset--;
2557 do {
2558 mas_descend(mast->orig_l);
2559 mast->orig_l->offset =
2560 mas_data_end(mast->orig_l);
2561 depth--;
2562 } while (depth);
2563
2564 mast_rebalance_prev(mast);
2565 do {
2566 unsigned char r_off;
2567 struct maple_enode *child = l_tmp.node;
2568
2569 mas_ascend(&l_tmp);
2570 if (ancestor == l_tmp.node)
2571 r_off = end;
2572 else
2573 r_off = mas_data_end(&l_tmp);
2574
2575 if (l_tmp.offset < r_off)
2576 l_tmp.offset++;
2577
2578 if (l_tmp.offset < r_off)
2579 mas_topiary_range(&l_tmp, mast->destroy,
2580 l_tmp.offset, r_off);
2581
2582 if (r_tmp.node != child)
2583 mat_add(mast->free, child);
2584
2585 } while (l_tmp.node != ancestor);
2586
2587 *mast->orig_r = r_tmp;
2588 return true;
2589 }
2590 } while (!mte_is_root(mast->orig_r->node));
2591
2592 *mast->orig_r = r_tmp;
2593 *mast->orig_l = l_tmp;
2594 return false;
2595}
2596
2597/*
2598 * mast_ascend_free() - Add current original maple state nodes to the free list
2599 * and ascend.
2600 * @mast: the maple subtree state.
2601 *
2602 * Ascend the original left and right sides and add the previous nodes to the
2603 * free list. Set the slots to point to the correct location in the new nodes.
2604 */
2605static inline void
2606mast_ascend_free(struct maple_subtree_state *mast)
2607{
2608 MA_WR_STATE(wr_mas, mast->orig_r, NULL);
2609 struct maple_enode *left = mast->orig_l->node;
2610 struct maple_enode *right = mast->orig_r->node;
2611
2612 mas_ascend(mast->orig_l);
2613 mas_ascend(mast->orig_r);
2614 mat_add(mast->free, left);
2615
2616 if (left != right)
2617 mat_add(mast->free, right);
2618
2619 mast->orig_r->offset = 0;
2620 mast->orig_r->index = mast->r->max;
2621 /* last should be larger than or equal to index */
2622 if (mast->orig_r->last < mast->orig_r->index)
2623 mast->orig_r->last = mast->orig_r->index;
2624 /*
2625 * The node may not contain the value so set slot to ensure all
2626 * of the nodes contents are freed or destroyed.
2627 */
2628 wr_mas.type = mte_node_type(mast->orig_r->node);
2629 mas_wr_node_walk(&wr_mas);
2630 /* Set up the left side of things */
2631 mast->orig_l->offset = 0;
2632 mast->orig_l->index = mast->l->min;
2633 wr_mas.mas = mast->orig_l;
2634 wr_mas.type = mte_node_type(mast->orig_l->node);
2635 mas_wr_node_walk(&wr_mas);
2636
2637 mast->bn->type = wr_mas.type;
2638}
2639
2640/*
2641 * mas_new_ma_node() - Create and return a new maple node. Helper function.
2642 * @mas: the maple state with the allocations.
2643 * @b_node: the maple_big_node with the type encoding.
2644 *
2645 * Use the node type from the maple_big_node to allocate a new node from the
2646 * ma_state. This function exists mainly for code readability.
2647 *
2648 * Return: A new maple encoded node
2649 */
2650static inline struct maple_enode
2651*mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
2652{
2653 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
2654}
2655
2656/*
2657 * mas_mab_to_node() - Set up right and middle nodes
2658 *
2659 * @mas: the maple state that contains the allocations.
2660 * @b_node: the node which contains the data.
2661 * @left: The pointer which will have the left node
2662 * @right: The pointer which may have the right node
2663 * @middle: the pointer which may have the middle node (rare)
2664 * @mid_split: the split location for the middle node
2665 *
2666 * Return: the split of left.
2667 */
2668static inline unsigned char mas_mab_to_node(struct ma_state *mas,
2669 struct maple_big_node *b_node, struct maple_enode **left,
2670 struct maple_enode **right, struct maple_enode **middle,
2671 unsigned char *mid_split, unsigned long min)
2672{
2673 unsigned char split = 0;
2674 unsigned char slot_count = mt_slots[b_node->type];
2675
2676 *left = mas_new_ma_node(mas, b_node);
2677 *right = NULL;
2678 *middle = NULL;
2679 *mid_split = 0;
2680
2681 if (b_node->b_end < slot_count) {
2682 split = b_node->b_end;
2683 } else {
2684 split = mab_calc_split(mas, b_node, mid_split, min);
2685 *right = mas_new_ma_node(mas, b_node);
2686 }
2687
2688 if (*mid_split)
2689 *middle = mas_new_ma_node(mas, b_node);
2690
2691 return split;
2692
2693}
2694
2695/*
2696 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
2697 * pointer.
2698 * @b_node - the big node to add the entry
2699 * @mas - the maple state to get the pivot (mas->max)
2700 * @entry - the entry to add, if NULL nothing happens.
2701 */
2702static inline void mab_set_b_end(struct maple_big_node *b_node,
2703 struct ma_state *mas,
2704 void *entry)
2705{
2706 if (!entry)
2707 return;
2708
2709 b_node->slot[b_node->b_end] = entry;
2710 if (mt_is_alloc(mas->tree))
2711 b_node->gap[b_node->b_end] = mas_max_gap(mas);
2712 b_node->pivot[b_node->b_end++] = mas->max;
2713}
2714
2715/*
2716 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent
2717 * of @mas->node to either @left or @right, depending on @slot and @split
2718 *
2719 * @mas - the maple state with the node that needs a parent
2720 * @left - possible parent 1
2721 * @right - possible parent 2
2722 * @slot - the slot the mas->node was placed
2723 * @split - the split location between @left and @right
2724 */
2725static inline void mas_set_split_parent(struct ma_state *mas,
2726 struct maple_enode *left,
2727 struct maple_enode *right,
2728 unsigned char *slot, unsigned char split)
2729{
2730 if (mas_is_none(mas))
2731 return;
2732
2733 if ((*slot) <= split)
2734 mte_set_parent(mas->node, left, *slot);
2735 else if (right)
2736 mte_set_parent(mas->node, right, (*slot) - split - 1);
2737
2738 (*slot)++;
2739}
2740
2741/*
2742 * mte_mid_split_check() - Check if the next node passes the mid-split
2743 * @**l: Pointer to left encoded maple node.
2744 * @**m: Pointer to middle encoded maple node.
2745 * @**r: Pointer to right encoded maple node.
2746 * @slot: The offset
2747 * @*split: The split location.
2748 * @mid_split: The middle split.
2749 */
2750static inline void mte_mid_split_check(struct maple_enode **l,
2751 struct maple_enode **r,
2752 struct maple_enode *right,
2753 unsigned char slot,
2754 unsigned char *split,
2755 unsigned char mid_split)
2756{
2757 if (*r == right)
2758 return;
2759
2760 if (slot < mid_split)
2761 return;
2762
2763 *l = *r;
2764 *r = right;
2765 *split = mid_split;
2766}
2767
2768/*
2769 * mast_set_split_parents() - Helper function to set three nodes parents. Slot
2770 * is taken from @mast->l.
2771 * @mast - the maple subtree state
2772 * @left - the left node
2773 * @right - the right node
2774 * @split - the split location.
2775 */
2776static inline void mast_set_split_parents(struct maple_subtree_state *mast,
2777 struct maple_enode *left,
2778 struct maple_enode *middle,
2779 struct maple_enode *right,
2780 unsigned char split,
2781 unsigned char mid_split)
2782{
2783 unsigned char slot;
2784 struct maple_enode *l = left;
2785 struct maple_enode *r = right;
2786
2787 if (mas_is_none(mast->l))
2788 return;
2789
2790 if (middle)
2791 r = middle;
2792
2793 slot = mast->l->offset;
2794
2795 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2796 mas_set_split_parent(mast->l, l, r, &slot, split);
2797
2798 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2799 mas_set_split_parent(mast->m, l, r, &slot, split);
2800
2801 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2802 mas_set_split_parent(mast->r, l, r, &slot, split);
2803}
2804
2805/*
2806 * mas_wmb_replace() - Write memory barrier and replace
2807 * @mas: The maple state
2808 * @free: the maple topiary list of nodes to free
2809 * @destroy: The maple topiary list of nodes to destroy (walk and free)
2810 *
2811 * Updates gap as necessary.
2812 */
2813static inline void mas_wmb_replace(struct ma_state *mas,
2814 struct ma_topiary *free,
2815 struct ma_topiary *destroy)
2816{
2817 /* All nodes must see old data as dead prior to replacing that data */
2818 smp_wmb(); /* Needed for RCU */
2819
2820 /* Insert the new data in the tree */
2821 mas_replace(mas, true);
2822
2823 if (!mte_is_leaf(mas->node))
2824 mas_descend_adopt(mas);
2825
2826 mas_mat_free(mas, free);
2827
2828 if (destroy)
2829 mas_mat_destroy(mas, destroy);
2830
2831 if (mte_is_leaf(mas->node))
2832 return;
2833
2834 mas_update_gap(mas);
2835}
2836
2837/*
2838 * mast_new_root() - Set a new tree root during subtree creation
2839 * @mast: The maple subtree state
2840 * @mas: The maple state
2841 */
2842static inline void mast_new_root(struct maple_subtree_state *mast,
2843 struct ma_state *mas)
2844{
2845 mas_mn(mast->l)->parent =
2846 ma_parent_ptr(((unsigned long)mas->tree | MA_ROOT_PARENT));
2847 if (!mte_dead_node(mast->orig_l->node) &&
2848 !mte_is_root(mast->orig_l->node)) {
2849 do {
2850 mast_ascend_free(mast);
2851 mast_topiary(mast);
2852 } while (!mte_is_root(mast->orig_l->node));
2853 }
2854 if ((mast->orig_l->node != mas->node) &&
2855 (mast->l->depth > mas_mt_height(mas))) {
2856 mat_add(mast->free, mas->node);
2857 }
2858}
2859
2860/*
2861 * mast_cp_to_nodes() - Copy data out to nodes.
2862 * @mast: The maple subtree state
2863 * @left: The left encoded maple node
2864 * @middle: The middle encoded maple node
2865 * @right: The right encoded maple node
2866 * @split: The location to split between left and (middle ? middle : right)
2867 * @mid_split: The location to split between middle and right.
2868 */
2869static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
2870 struct maple_enode *left, struct maple_enode *middle,
2871 struct maple_enode *right, unsigned char split, unsigned char mid_split)
2872{
2873 bool new_lmax = true;
2874
2875 mast->l->node = mte_node_or_none(left);
2876 mast->m->node = mte_node_or_none(middle);
2877 mast->r->node = mte_node_or_none(right);
2878
2879 mast->l->min = mast->orig_l->min;
2880 if (split == mast->bn->b_end) {
2881 mast->l->max = mast->orig_r->max;
2882 new_lmax = false;
2883 }
2884
2885 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
2886
2887 if (middle) {
2888 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
2889 mast->m->min = mast->bn->pivot[split] + 1;
2890 split = mid_split;
2891 }
2892
2893 mast->r->max = mast->orig_r->max;
2894 if (right) {
2895 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
2896 mast->r->min = mast->bn->pivot[split] + 1;
2897 }
2898}
2899
2900/*
2901 * mast_combine_cp_left - Copy in the original left side of the tree into the
2902 * combined data set in the maple subtree state big node.
2903 * @mast: The maple subtree state
2904 */
2905static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
2906{
2907 unsigned char l_slot = mast->orig_l->offset;
2908
2909 if (!l_slot)
2910 return;
2911
2912 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
2913}
2914
2915/*
2916 * mast_combine_cp_right: Copy in the original right side of the tree into the
2917 * combined data set in the maple subtree state big node.
2918 * @mast: The maple subtree state
2919 */
2920static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
2921{
2922 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
2923 return;
2924
2925 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
2926 mt_slot_count(mast->orig_r->node), mast->bn,
2927 mast->bn->b_end);
2928 mast->orig_r->last = mast->orig_r->max;
2929}
2930
2931/*
2932 * mast_sufficient: Check if the maple subtree state has enough data in the big
2933 * node to create at least one sufficient node
2934 * @mast: the maple subtree state
2935 */
2936static inline bool mast_sufficient(struct maple_subtree_state *mast)
2937{
2938 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
2939 return true;
2940
2941 return false;
2942}
2943
2944/*
2945 * mast_overflow: Check if there is too much data in the subtree state for a
2946 * single node.
2947 * @mast: The maple subtree state
2948 */
2949static inline bool mast_overflow(struct maple_subtree_state *mast)
2950{
2951 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
2952 return true;
2953
2954 return false;
2955}
2956
2957static inline void *mtree_range_walk(struct ma_state *mas)
2958{
2959 unsigned long *pivots;
2960 unsigned char offset;
2961 struct maple_node *node;
2962 struct maple_enode *next, *last;
2963 enum maple_type type;
2964 void __rcu **slots;
2965 unsigned char end;
2966 unsigned long max, min;
2967 unsigned long prev_max, prev_min;
2968
1b9c9183
LB
2969 next = mas->node;
2970 min = mas->min;
54a611b6
LH
2971 max = mas->max;
2972 do {
2973 offset = 0;
2974 last = next;
2975 node = mte_to_node(next);
2976 type = mte_node_type(next);
2977 pivots = ma_pivots(node, type);
2978 end = ma_data_end(node, type, pivots, max);
2979 if (unlikely(ma_dead_node(node)))
2980 goto dead_node;
2981
2982 if (pivots[offset] >= mas->index) {
2983 prev_max = max;
2984 prev_min = min;
2985 max = pivots[offset];
2986 goto next;
2987 }
2988
2989 do {
2990 offset++;
2991 } while ((offset < end) && (pivots[offset] < mas->index));
2992
2993 prev_min = min;
2994 min = pivots[offset - 1] + 1;
2995 prev_max = max;
2996 if (likely(offset < end && pivots[offset]))
2997 max = pivots[offset];
2998
2999next:
3000 slots = ma_slots(node, type);
3001 next = mt_slot(mas->tree, slots, offset);
3002 if (unlikely(ma_dead_node(node)))
3003 goto dead_node;
3004 } while (!ma_is_leaf(type));
3005
3006 mas->offset = offset;
3007 mas->index = min;
3008 mas->last = max;
3009 mas->min = prev_min;
3010 mas->max = prev_max;
3011 mas->node = last;
831978e3 3012 return (void *)next;
54a611b6
LH
3013
3014dead_node:
3015 mas_reset(mas);
3016 return NULL;
3017}
3018
3019/*
3020 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
3021 * @mas: The starting maple state
3022 * @mast: The maple_subtree_state, keeps track of 4 maple states.
3023 * @count: The estimated count of iterations needed.
3024 *
3025 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
3026 * is hit. First @b_node is split into two entries which are inserted into the
3027 * next iteration of the loop. @b_node is returned populated with the final
3028 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the
3029 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
3030 * to account of what has been copied into the new sub-tree. The update of
3031 * orig_l_mas->last is used in mas_consume to find the slots that will need to
3032 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of
3033 * the new sub-tree in case the sub-tree becomes the full tree.
3034 *
3035 * Return: the number of elements in b_node during the last loop.
3036 */
3037static int mas_spanning_rebalance(struct ma_state *mas,
3038 struct maple_subtree_state *mast, unsigned char count)
3039{
3040 unsigned char split, mid_split;
3041 unsigned char slot = 0;
3042 struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
3043
3044 MA_STATE(l_mas, mas->tree, mas->index, mas->index);
3045 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3046 MA_STATE(m_mas, mas->tree, mas->index, mas->index);
3047 MA_TOPIARY(free, mas->tree);
3048 MA_TOPIARY(destroy, mas->tree);
3049
3050 /*
3051 * The tree needs to be rebalanced and leaves need to be kept at the same level.
3052 * Rebalancing is done by use of the ``struct maple_topiary``.
3053 */
3054 mast->l = &l_mas;
3055 mast->m = &m_mas;
3056 mast->r = &r_mas;
3057 mast->free = &free;
3058 mast->destroy = &destroy;
3059 l_mas.node = r_mas.node = m_mas.node = MAS_NONE;
0abb964a
LH
3060
3061 /* Check if this is not root and has sufficient data. */
3062 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
54a611b6
LH
3063 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
3064 mast_spanning_rebalance(mast);
3065
3066 mast->orig_l->depth = 0;
3067
3068 /*
3069 * Each level of the tree is examined and balanced, pushing data to the left or
3070 * right, or rebalancing against left or right nodes is employed to avoid
3071 * rippling up the tree to limit the amount of churn. Once a new sub-section of
3072 * the tree is created, there may be a mix of new and old nodes. The old nodes
3073 * will have the incorrect parent pointers and currently be in two trees: the
3074 * original tree and the partially new tree. To remedy the parent pointers in
3075 * the old tree, the new data is swapped into the active tree and a walk down
3076 * the tree is performed and the parent pointers are updated.
3077 * See mas_descend_adopt() for more information..
3078 */
3079 while (count--) {
3080 mast->bn->b_end--;
3081 mast->bn->type = mte_node_type(mast->orig_l->node);
3082 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
3083 &mid_split, mast->orig_l->min);
3084 mast_set_split_parents(mast, left, middle, right, split,
3085 mid_split);
3086 mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
3087
3088 /*
3089 * Copy data from next level in the tree to mast->bn from next
3090 * iteration
3091 */
3092 memset(mast->bn, 0, sizeof(struct maple_big_node));
3093 mast->bn->type = mte_node_type(left);
3094 mast->orig_l->depth++;
3095
3096 /* Root already stored in l->node. */
3097 if (mas_is_root_limits(mast->l))
3098 goto new_root;
3099
3100 mast_ascend_free(mast);
3101 mast_combine_cp_left(mast);
3102 l_mas.offset = mast->bn->b_end;
3103 mab_set_b_end(mast->bn, &l_mas, left);
3104 mab_set_b_end(mast->bn, &m_mas, middle);
3105 mab_set_b_end(mast->bn, &r_mas, right);
3106
3107 /* Copy anything necessary out of the right node. */
3108 mast_combine_cp_right(mast);
3109 mast_topiary(mast);
3110 mast->orig_l->last = mast->orig_l->max;
3111
3112 if (mast_sufficient(mast))
3113 continue;
3114
3115 if (mast_overflow(mast))
3116 continue;
3117
3118 /* May be a new root stored in mast->bn */
3119 if (mas_is_root_limits(mast->orig_l))
3120 break;
3121
3122 mast_spanning_rebalance(mast);
3123
3124 /* rebalancing from other nodes may require another loop. */
3125 if (!count)
3126 count++;
3127 }
3128
3129 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
3130 mte_node_type(mast->orig_l->node));
3131 mast->orig_l->depth++;
3132 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
3133 mte_set_parent(left, l_mas.node, slot);
3134 if (middle)
3135 mte_set_parent(middle, l_mas.node, ++slot);
3136
3137 if (right)
3138 mte_set_parent(right, l_mas.node, ++slot);
3139
3140 if (mas_is_root_limits(mast->l)) {
3141new_root:
3142 mast_new_root(mast, mas);
3143 } else {
3144 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
3145 }
3146
3147 if (!mte_dead_node(mast->orig_l->node))
3148 mat_add(&free, mast->orig_l->node);
3149
3150 mas->depth = mast->orig_l->depth;
3151 *mast->orig_l = l_mas;
3152 mte_set_node_dead(mas->node);
3153
3154 /* Set up mas for insertion. */
3155 mast->orig_l->depth = mas->depth;
3156 mast->orig_l->alloc = mas->alloc;
3157 *mas = *mast->orig_l;
3158 mas_wmb_replace(mas, &free, &destroy);
3159 mtree_range_walk(mas);
3160 return mast->bn->b_end;
3161}
3162
3163/*
3164 * mas_rebalance() - Rebalance a given node.
3165 * @mas: The maple state
3166 * @b_node: The big maple node.
3167 *
3168 * Rebalance two nodes into a single node or two new nodes that are sufficient.
3169 * Continue upwards until tree is sufficient.
3170 *
3171 * Return: the number of elements in b_node during the last loop.
3172 */
3173static inline int mas_rebalance(struct ma_state *mas,
3174 struct maple_big_node *b_node)
3175{
3176 char empty_count = mas_mt_height(mas);
3177 struct maple_subtree_state mast;
3178 unsigned char shift, b_end = ++b_node->b_end;
3179
3180 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3181 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3182
3183 trace_ma_op(__func__, mas);
3184
3185 /*
3186 * Rebalancing occurs if a node is insufficient. Data is rebalanced
3187 * against the node to the right if it exists, otherwise the node to the
3188 * left of this node is rebalanced against this node. If rebalancing
3189 * causes just one node to be produced instead of two, then the parent
3190 * is also examined and rebalanced if it is insufficient. Every level
3191 * tries to combine the data in the same way. If one node contains the
3192 * entire range of the tree, then that node is used as a new root node.
3193 */
3194 mas_node_count(mas, 1 + empty_count * 3);
3195 if (mas_is_err(mas))
3196 return 0;
3197
3198 mast.orig_l = &l_mas;
3199 mast.orig_r = &r_mas;
3200 mast.bn = b_node;
3201 mast.bn->type = mte_node_type(mas->node);
3202
3203 l_mas = r_mas = *mas;
3204
3205 if (mas_next_sibling(&r_mas)) {
3206 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
3207 r_mas.last = r_mas.index = r_mas.max;
3208 } else {
3209 mas_prev_sibling(&l_mas);
3210 shift = mas_data_end(&l_mas) + 1;
3211 mab_shift_right(b_node, shift);
3212 mas->offset += shift;
3213 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
3214 b_node->b_end = shift + b_end;
3215 l_mas.index = l_mas.last = l_mas.min;
3216 }
3217
3218 return mas_spanning_rebalance(mas, &mast, empty_count);
3219}
3220
3221/*
3222 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
3223 * state.
3224 * @mas: The maple state
3225 * @end: The end of the left-most node.
3226 *
3227 * During a mass-insert event (such as forking), it may be necessary to
3228 * rebalance the left-most node when it is not sufficient.
3229 */
3230static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
3231{
3232 enum maple_type mt = mte_node_type(mas->node);
3233 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
3234 struct maple_enode *eparent;
3235 unsigned char offset, tmp, split = mt_slots[mt] / 2;
3236 void __rcu **l_slots, **slots;
3237 unsigned long *l_pivs, *pivs, gap;
3238 bool in_rcu = mt_in_rcu(mas->tree);
3239
3240 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3241
3242 l_mas = *mas;
3243 mas_prev_sibling(&l_mas);
3244
3245 /* set up node. */
3246 if (in_rcu) {
3247 /* Allocate for both left and right as well as parent. */
3248 mas_node_count(mas, 3);
3249 if (mas_is_err(mas))
3250 return;
3251
3252 newnode = mas_pop_node(mas);
3253 } else {
3254 newnode = &reuse;
3255 }
3256
3257 node = mas_mn(mas);
3258 newnode->parent = node->parent;
3259 slots = ma_slots(newnode, mt);
3260 pivs = ma_pivots(newnode, mt);
3261 left = mas_mn(&l_mas);
3262 l_slots = ma_slots(left, mt);
3263 l_pivs = ma_pivots(left, mt);
3264 if (!l_slots[split])
3265 split++;
3266 tmp = mas_data_end(&l_mas) - split;
3267
3268 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
3269 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
3270 pivs[tmp] = l_mas.max;
3271 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
3272 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
3273
3274 l_mas.max = l_pivs[split];
3275 mas->min = l_mas.max + 1;
3276 eparent = mt_mk_node(mte_parent(l_mas.node),
3277 mas_parent_enum(&l_mas, l_mas.node));
3278 tmp += end;
3279 if (!in_rcu) {
3280 unsigned char max_p = mt_pivots[mt];
3281 unsigned char max_s = mt_slots[mt];
3282
3283 if (tmp < max_p)
3284 memset(pivs + tmp, 0,
3285 sizeof(unsigned long *) * (max_p - tmp));
3286
3287 if (tmp < mt_slots[mt])
3288 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3289
3290 memcpy(node, newnode, sizeof(struct maple_node));
3291 ma_set_meta(node, mt, 0, tmp - 1);
3292 mte_set_pivot(eparent, mte_parent_slot(l_mas.node),
3293 l_pivs[split]);
3294
3295 /* Remove data from l_pivs. */
3296 tmp = split + 1;
3297 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
3298 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3299 ma_set_meta(left, mt, 0, split);
3300
3301 goto done;
3302 }
3303
3304 /* RCU requires replacing both l_mas, mas, and parent. */
3305 mas->node = mt_mk_node(newnode, mt);
3306 ma_set_meta(newnode, mt, 0, tmp);
3307
3308 new_left = mas_pop_node(mas);
3309 new_left->parent = left->parent;
3310 mt = mte_node_type(l_mas.node);
3311 slots = ma_slots(new_left, mt);
3312 pivs = ma_pivots(new_left, mt);
3313 memcpy(slots, l_slots, sizeof(void *) * split);
3314 memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
3315 ma_set_meta(new_left, mt, 0, split);
3316 l_mas.node = mt_mk_node(new_left, mt);
3317
3318 /* replace parent. */
3319 offset = mte_parent_slot(mas->node);
3320 mt = mas_parent_enum(&l_mas, l_mas.node);
3321 parent = mas_pop_node(mas);
3322 slots = ma_slots(parent, mt);
3323 pivs = ma_pivots(parent, mt);
3324 memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node));
3325 rcu_assign_pointer(slots[offset], mas->node);
3326 rcu_assign_pointer(slots[offset - 1], l_mas.node);
3327 pivs[offset - 1] = l_mas.max;
3328 eparent = mt_mk_node(parent, mt);
3329done:
3330 gap = mas_leaf_max_gap(mas);
3331 mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
3332 gap = mas_leaf_max_gap(&l_mas);
3333 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
3334 mas_ascend(mas);
3335
3336 if (in_rcu)
3337 mas_replace(mas, false);
3338
3339 mas_update_gap(mas);
3340}
3341
3342/*
3343 * mas_split_final_node() - Split the final node in a subtree operation.
3344 * @mast: the maple subtree state
3345 * @mas: The maple state
3346 * @height: The height of the tree in case it's a new root.
3347 */
3348static inline bool mas_split_final_node(struct maple_subtree_state *mast,
3349 struct ma_state *mas, int height)
3350{
3351 struct maple_enode *ancestor;
3352
3353 if (mte_is_root(mas->node)) {
3354 if (mt_is_alloc(mas->tree))
3355 mast->bn->type = maple_arange_64;
3356 else
3357 mast->bn->type = maple_range_64;
3358 mas->depth = height;
3359 }
3360 /*
3361 * Only a single node is used here, could be root.
3362 * The Big_node data should just fit in a single node.
3363 */
3364 ancestor = mas_new_ma_node(mas, mast->bn);
3365 mte_set_parent(mast->l->node, ancestor, mast->l->offset);
3366 mte_set_parent(mast->r->node, ancestor, mast->r->offset);
3367 mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
3368
3369 mast->l->node = ancestor;
3370 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
3371 mas->offset = mast->bn->b_end - 1;
3372 return true;
3373}
3374
3375/*
3376 * mast_fill_bnode() - Copy data into the big node in the subtree state
3377 * @mast: The maple subtree state
3378 * @mas: the maple state
3379 * @skip: The number of entries to skip for new nodes insertion.
3380 */
3381static inline void mast_fill_bnode(struct maple_subtree_state *mast,
3382 struct ma_state *mas,
3383 unsigned char skip)
3384{
3385 bool cp = true;
3386 struct maple_enode *old = mas->node;
3387 unsigned char split;
3388
3389 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
3390 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
3391 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
3392 mast->bn->b_end = 0;
3393
3394 if (mte_is_root(mas->node)) {
3395 cp = false;
3396 } else {
3397 mas_ascend(mas);
3398 mat_add(mast->free, old);
3399 mas->offset = mte_parent_slot(mas->node);
3400 }
3401
3402 if (cp && mast->l->offset)
3403 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
3404
3405 split = mast->bn->b_end;
3406 mab_set_b_end(mast->bn, mast->l, mast->l->node);
3407 mast->r->offset = mast->bn->b_end;
3408 mab_set_b_end(mast->bn, mast->r, mast->r->node);
3409 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
3410 cp = false;
3411
3412 if (cp)
3413 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
3414 mast->bn, mast->bn->b_end);
3415
3416 mast->bn->b_end--;
3417 mast->bn->type = mte_node_type(mas->node);
3418}
3419
3420/*
3421 * mast_split_data() - Split the data in the subtree state big node into regular
3422 * nodes.
3423 * @mast: The maple subtree state
3424 * @mas: The maple state
3425 * @split: The location to split the big node
3426 */
3427static inline void mast_split_data(struct maple_subtree_state *mast,
3428 struct ma_state *mas, unsigned char split)
3429{
3430 unsigned char p_slot;
3431
3432 mab_mas_cp(mast->bn, 0, split, mast->l, true);
3433 mte_set_pivot(mast->r->node, 0, mast->r->max);
3434 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
3435 mast->l->offset = mte_parent_slot(mas->node);
3436 mast->l->max = mast->bn->pivot[split];
3437 mast->r->min = mast->l->max + 1;
3438 if (mte_is_leaf(mas->node))
3439 return;
3440
3441 p_slot = mast->orig_l->offset;
3442 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
3443 &p_slot, split);
3444 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
3445 &p_slot, split);
3446}
3447
3448/*
3449 * mas_push_data() - Instead of splitting a node, it is beneficial to push the
3450 * data to the right or left node if there is room.
3451 * @mas: The maple state
3452 * @height: The current height of the maple state
3453 * @mast: The maple subtree state
3454 * @left: Push left or not.
3455 *
3456 * Keeping the height of the tree low means faster lookups.
3457 *
3458 * Return: True if pushed, false otherwise.
3459 */
3460static inline bool mas_push_data(struct ma_state *mas, int height,
3461 struct maple_subtree_state *mast, bool left)
3462{
3463 unsigned char slot_total = mast->bn->b_end;
3464 unsigned char end, space, split;
3465
3466 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
3467 tmp_mas = *mas;
3468 tmp_mas.depth = mast->l->depth;
3469
3470 if (left && !mas_prev_sibling(&tmp_mas))
3471 return false;
3472 else if (!left && !mas_next_sibling(&tmp_mas))
3473 return false;
3474
3475 end = mas_data_end(&tmp_mas);
3476 slot_total += end;
3477 space = 2 * mt_slot_count(mas->node) - 2;
3478 /* -2 instead of -1 to ensure there isn't a triple split */
3479 if (ma_is_leaf(mast->bn->type))
3480 space--;
3481
3482 if (mas->max == ULONG_MAX)
3483 space--;
3484
3485 if (slot_total >= space)
3486 return false;
3487
3488 /* Get the data; Fill mast->bn */
3489 mast->bn->b_end++;
3490 if (left) {
3491 mab_shift_right(mast->bn, end + 1);
3492 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
3493 mast->bn->b_end = slot_total + 1;
3494 } else {
3495 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
3496 }
3497
3498 /* Configure mast for splitting of mast->bn */
3499 split = mt_slots[mast->bn->type] - 2;
3500 if (left) {
3501 /* Switch mas to prev node */
3502 mat_add(mast->free, mas->node);
3503 *mas = tmp_mas;
3504 /* Start using mast->l for the left side. */
3505 tmp_mas.node = mast->l->node;
3506 *mast->l = tmp_mas;
3507 } else {
3508 mat_add(mast->free, tmp_mas.node);
3509 tmp_mas.node = mast->r->node;
3510 *mast->r = tmp_mas;
3511 split = slot_total - split;
3512 }
3513 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
3514 /* Update parent slot for split calculation. */
3515 if (left)
3516 mast->orig_l->offset += end + 1;
3517
3518 mast_split_data(mast, mas, split);
3519 mast_fill_bnode(mast, mas, 2);
3520 mas_split_final_node(mast, mas, height + 1);
3521 return true;
3522}
3523
3524/*
3525 * mas_split() - Split data that is too big for one node into two.
3526 * @mas: The maple state
3527 * @b_node: The maple big node
3528 * Return: 1 on success, 0 on failure.
3529 */
3530static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
3531{
54a611b6
LH
3532 struct maple_subtree_state mast;
3533 int height = 0;
3534 unsigned char mid_split, split = 0;
3535
3536 /*
3537 * Splitting is handled differently from any other B-tree; the Maple
3538 * Tree splits upwards. Splitting up means that the split operation
3539 * occurs when the walk of the tree hits the leaves and not on the way
3540 * down. The reason for splitting up is that it is impossible to know
3541 * how much space will be needed until the leaf is (or leaves are)
3542 * reached. Since overwriting data is allowed and a range could
3543 * overwrite more than one range or result in changing one entry into 3
3544 * entries, it is impossible to know if a split is required until the
3545 * data is examined.
3546 *
3547 * Splitting is a balancing act between keeping allocations to a minimum
3548 * and avoiding a 'jitter' event where a tree is expanded to make room
3549 * for an entry followed by a contraction when the entry is removed. To
3550 * accomplish the balance, there are empty slots remaining in both left
3551 * and right nodes after a split.
3552 */
3553 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3554 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3555 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
3556 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
3557 MA_TOPIARY(mat, mas->tree);
3558
3559 trace_ma_op(__func__, mas);
3560 mas->depth = mas_mt_height(mas);
3561 /* Allocation failures will happen early. */
3562 mas_node_count(mas, 1 + mas->depth * 2);
3563 if (mas_is_err(mas))
3564 return 0;
3565
3566 mast.l = &l_mas;
3567 mast.r = &r_mas;
3568 mast.orig_l = &prev_l_mas;
3569 mast.orig_r = &prev_r_mas;
3570 mast.free = &mat;
3571 mast.bn = b_node;
3572
3573 while (height++ <= mas->depth) {
3574 if (mt_slots[b_node->type] > b_node->b_end) {
3575 mas_split_final_node(&mast, mas, height);
3576 break;
3577 }
3578
3579 l_mas = r_mas = *mas;
3580 l_mas.node = mas_new_ma_node(mas, b_node);
3581 r_mas.node = mas_new_ma_node(mas, b_node);
3582 /*
3583 * Another way that 'jitter' is avoided is to terminate a split up early if the
3584 * left or right node has space to spare. This is referred to as "pushing left"
3585 * or "pushing right" and is similar to the B* tree, except the nodes left or
3586 * right can rarely be reused due to RCU, but the ripple upwards is halted which
3587 * is a significant savings.
3588 */
3589 /* Try to push left. */
3590 if (mas_push_data(mas, height, &mast, true))
3591 break;
3592
3593 /* Try to push right. */
3594 if (mas_push_data(mas, height, &mast, false))
3595 break;
3596
3597 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
3598 mast_split_data(&mast, mas, split);
3599 /*
3600 * Usually correct, mab_mas_cp in the above call overwrites
3601 * r->max.
3602 */
3603 mast.r->max = mas->max;
3604 mast_fill_bnode(&mast, mas, 1);
3605 prev_l_mas = *mast.l;
3606 prev_r_mas = *mast.r;
3607 }
3608
3609 /* Set the original node as dead */
3610 mat_add(mast.free, mas->node);
3611 mas->node = l_mas.node;
3612 mas_wmb_replace(mas, mast.free, NULL);
3613 mtree_range_walk(mas);
3614 return 1;
3615}
3616
3617/*
3618 * mas_reuse_node() - Reuse the node to store the data.
3619 * @wr_mas: The maple write state
3620 * @bn: The maple big node
3621 * @end: The end of the data.
3622 *
3623 * Will always return false in RCU mode.
3624 *
3625 * Return: True if node was reused, false otherwise.
3626 */
3627static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
3628 struct maple_big_node *bn, unsigned char end)
3629{
3630 /* Need to be rcu safe. */
3631 if (mt_in_rcu(wr_mas->mas->tree))
3632 return false;
3633
3634 if (end > bn->b_end) {
3635 int clear = mt_slots[wr_mas->type] - bn->b_end;
3636
3637 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
3638 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
3639 }
3640 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
3641 return true;
3642}
3643
3644/*
3645 * mas_commit_b_node() - Commit the big node into the tree.
3646 * @wr_mas: The maple write state
3647 * @b_node: The maple big node
3648 * @end: The end of the data.
3649 */
44081c77 3650static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas,
54a611b6
LH
3651 struct maple_big_node *b_node, unsigned char end)
3652{
3653 struct maple_node *node;
3654 unsigned char b_end = b_node->b_end;
3655 enum maple_type b_type = b_node->type;
3656
3657 if ((b_end < mt_min_slots[b_type]) &&
3658 (!mte_is_root(wr_mas->mas->node)) &&
3659 (mas_mt_height(wr_mas->mas) > 1))
3660 return mas_rebalance(wr_mas->mas, b_node);
3661
3662 if (b_end >= mt_slots[b_type])
3663 return mas_split(wr_mas->mas, b_node);
3664
3665 if (mas_reuse_node(wr_mas, b_node, end))
3666 goto reuse_node;
3667
3668 mas_node_count(wr_mas->mas, 1);
3669 if (mas_is_err(wr_mas->mas))
3670 return 0;
3671
3672 node = mas_pop_node(wr_mas->mas);
3673 node->parent = mas_mn(wr_mas->mas)->parent;
3674 wr_mas->mas->node = mt_mk_node(node, b_type);
7dc5ba62 3675 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
54a611b6
LH
3676 mas_replace(wr_mas->mas, false);
3677reuse_node:
3678 mas_update_gap(wr_mas->mas);
3679 return 1;
3680}
3681
3682/*
3683 * mas_root_expand() - Expand a root to a node
3684 * @mas: The maple state
3685 * @entry: The entry to store into the tree
3686 */
3687static inline int mas_root_expand(struct ma_state *mas, void *entry)
3688{
3689 void *contents = mas_root_locked(mas);
3690 enum maple_type type = maple_leaf_64;
3691 struct maple_node *node;
3692 void __rcu **slots;
3693 unsigned long *pivots;
3694 int slot = 0;
3695
3696 mas_node_count(mas, 1);
3697 if (unlikely(mas_is_err(mas)))
3698 return 0;
3699
3700 node = mas_pop_node(mas);
3701 pivots = ma_pivots(node, type);
3702 slots = ma_slots(node, type);
3703 node->parent = ma_parent_ptr(
3704 ((unsigned long)mas->tree | MA_ROOT_PARENT));
3705 mas->node = mt_mk_node(node, type);
3706
3707 if (mas->index) {
3708 if (contents) {
3709 rcu_assign_pointer(slots[slot], contents);
3710 if (likely(mas->index > 1))
3711 slot++;
3712 }
3713 pivots[slot++] = mas->index - 1;
3714 }
3715
3716 rcu_assign_pointer(slots[slot], entry);
3717 mas->offset = slot;
3718 pivots[slot] = mas->last;
3719 if (mas->last != ULONG_MAX)
3720 slot++;
3721 mas->depth = 1;
3722 mas_set_height(mas);
c45ea315 3723 ma_set_meta(node, maple_leaf_64, 0, slot);
54a611b6
LH
3724 /* swap the new root into the tree */
3725 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
54a611b6
LH
3726 return slot;
3727}
3728
3729static inline void mas_store_root(struct ma_state *mas, void *entry)
3730{
3731 if (likely((mas->last != 0) || (mas->index != 0)))
3732 mas_root_expand(mas, entry);
3733 else if (((unsigned long) (entry) & 3) == 2)
3734 mas_root_expand(mas, entry);
3735 else {
3736 rcu_assign_pointer(mas->tree->ma_root, entry);
3737 mas->node = MAS_START;
3738 }
3739}
3740
3741/*
3742 * mas_is_span_wr() - Check if the write needs to be treated as a write that
3743 * spans the node.
3744 * @mas: The maple state
3745 * @piv: The pivot value being written
3746 * @type: The maple node type
3747 * @entry: The data to write
3748 *
3749 * Spanning writes are writes that start in one node and end in another OR if
3750 * the write of a %NULL will cause the node to end with a %NULL.
3751 *
3752 * Return: True if this is a spanning write, false otherwise.
3753 */
3754static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
3755{
3756 unsigned long max;
3757 unsigned long last = wr_mas->mas->last;
3758 unsigned long piv = wr_mas->r_max;
3759 enum maple_type type = wr_mas->type;
3760 void *entry = wr_mas->entry;
3761
3762 /* Contained in this pivot */
3763 if (piv > last)
3764 return false;
3765
3766 max = wr_mas->mas->max;
3767 if (unlikely(ma_is_leaf(type))) {
3768 /* Fits in the node, but may span slots. */
3769 if (last < max)
3770 return false;
3771
3772 /* Writes to the end of the node but not null. */
3773 if ((last == max) && entry)
3774 return false;
3775
3776 /*
3777 * Writing ULONG_MAX is not a spanning write regardless of the
3778 * value being written as long as the range fits in the node.
3779 */
3780 if ((last == ULONG_MAX) && (last == max))
3781 return false;
3782 } else if (piv == last) {
3783 if (entry)
3784 return false;
3785
3786 /* Detect spanning store wr walk */
3787 if (last == ULONG_MAX)
3788 return false;
3789 }
3790
3791 trace_ma_write(__func__, wr_mas->mas, piv, entry);
3792
3793 return true;
3794}
3795
3796static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
3797{
54a611b6
LH
3798 wr_mas->type = mte_node_type(wr_mas->mas->node);
3799 mas_wr_node_walk(wr_mas);
3800 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
3801}
3802
3803static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
3804{
3805 wr_mas->mas->max = wr_mas->r_max;
3806 wr_mas->mas->min = wr_mas->r_min;
3807 wr_mas->mas->node = wr_mas->content;
3808 wr_mas->mas->offset = 0;
9bbba563 3809 wr_mas->mas->depth++;
54a611b6
LH
3810}
3811/*
3812 * mas_wr_walk() - Walk the tree for a write.
3813 * @wr_mas: The maple write state
3814 *
3815 * Uses mas_slot_locked() and does not need to worry about dead nodes.
3816 *
3817 * Return: True if it's contained in a node, false on spanning write.
3818 */
3819static bool mas_wr_walk(struct ma_wr_state *wr_mas)
3820{
3821 struct ma_state *mas = wr_mas->mas;
3822
3823 while (true) {
3824 mas_wr_walk_descend(wr_mas);
3825 if (unlikely(mas_is_span_wr(wr_mas)))
3826 return false;
3827
3828 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3829 mas->offset);
3830 if (ma_is_leaf(wr_mas->type))
3831 return true;
3832
3833 mas_wr_walk_traverse(wr_mas);
3834 }
3835
3836 return true;
3837}
3838
3839static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
3840{
3841 struct ma_state *mas = wr_mas->mas;
3842
3843 while (true) {
3844 mas_wr_walk_descend(wr_mas);
3845 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3846 mas->offset);
3847 if (ma_is_leaf(wr_mas->type))
3848 return true;
3849 mas_wr_walk_traverse(wr_mas);
3850
3851 }
3852 return true;
3853}
3854/*
3855 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
3856 * @l_wr_mas: The left maple write state
3857 * @r_wr_mas: The right maple write state
3858 */
3859static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
3860 struct ma_wr_state *r_wr_mas)
3861{
3862 struct ma_state *r_mas = r_wr_mas->mas;
3863 struct ma_state *l_mas = l_wr_mas->mas;
3864 unsigned char l_slot;
3865
3866 l_slot = l_mas->offset;
3867 if (!l_wr_mas->content)
3868 l_mas->index = l_wr_mas->r_min;
3869
3870 if ((l_mas->index == l_wr_mas->r_min) &&
3871 (l_slot &&
3872 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
3873 if (l_slot > 1)
3874 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
3875 else
3876 l_mas->index = l_mas->min;
3877
3878 l_mas->offset = l_slot - 1;
3879 }
3880
3881 if (!r_wr_mas->content) {
3882 if (r_mas->last < r_wr_mas->r_max)
3883 r_mas->last = r_wr_mas->r_max;
3884 r_mas->offset++;
3885 } else if ((r_mas->last == r_wr_mas->r_max) &&
3886 (r_mas->last < r_mas->max) &&
3887 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
3888 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
3889 r_wr_mas->type, r_mas->offset + 1);
3890 r_mas->offset++;
3891 }
3892}
3893
3894static inline void *mas_state_walk(struct ma_state *mas)
3895{
3896 void *entry;
3897
3898 entry = mas_start(mas);
3899 if (mas_is_none(mas))
3900 return NULL;
3901
3902 if (mas_is_ptr(mas))
3903 return entry;
3904
3905 return mtree_range_walk(mas);
3906}
3907
3908/*
3909 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
3910 * to date.
3911 *
3912 * @mas: The maple state.
3913 *
3914 * Note: Leaves mas in undesirable state.
3915 * Return: The entry for @mas->index or %NULL on dead node.
3916 */
3917static inline void *mtree_lookup_walk(struct ma_state *mas)
3918{
3919 unsigned long *pivots;
3920 unsigned char offset;
3921 struct maple_node *node;
3922 struct maple_enode *next;
3923 enum maple_type type;
3924 void __rcu **slots;
3925 unsigned char end;
3926 unsigned long max;
3927
3928 next = mas->node;
3929 max = ULONG_MAX;
3930 do {
3931 offset = 0;
3932 node = mte_to_node(next);
3933 type = mte_node_type(next);
3934 pivots = ma_pivots(node, type);
3935 end = ma_data_end(node, type, pivots, max);
3936 if (unlikely(ma_dead_node(node)))
3937 goto dead_node;
54a611b6 3938 do {
ec07967d
PZ
3939 if (pivots[offset] >= mas->index) {
3940 max = pivots[offset];
3941 break;
3942 }
3943 } while (++offset < end);
54a611b6 3944
54a611b6
LH
3945 slots = ma_slots(node, type);
3946 next = mt_slot(mas->tree, slots, offset);
3947 if (unlikely(ma_dead_node(node)))
3948 goto dead_node;
3949 } while (!ma_is_leaf(type));
3950
831978e3 3951 return (void *)next;
54a611b6
LH
3952
3953dead_node:
3954 mas_reset(mas);
3955 return NULL;
3956}
3957
3958/*
3959 * mas_new_root() - Create a new root node that only contains the entry passed
3960 * in.
3961 * @mas: The maple state
3962 * @entry: The entry to store.
3963 *
3964 * Only valid when the index == 0 and the last == ULONG_MAX
3965 *
3966 * Return 0 on error, 1 on success.
3967 */
3968static inline int mas_new_root(struct ma_state *mas, void *entry)
3969{
3970 struct maple_enode *root = mas_root_locked(mas);
3971 enum maple_type type = maple_leaf_64;
3972 struct maple_node *node;
3973 void __rcu **slots;
3974 unsigned long *pivots;
3975
3976 if (!entry && !mas->index && mas->last == ULONG_MAX) {
3977 mas->depth = 0;
3978 mas_set_height(mas);
3979 rcu_assign_pointer(mas->tree->ma_root, entry);
3980 mas->node = MAS_START;
3981 goto done;
3982 }
3983
3984 mas_node_count(mas, 1);
3985 if (mas_is_err(mas))
3986 return 0;
3987
3988 node = mas_pop_node(mas);
3989 pivots = ma_pivots(node, type);
3990 slots = ma_slots(node, type);
3991 node->parent = ma_parent_ptr(
3992 ((unsigned long)mas->tree | MA_ROOT_PARENT));
3993 mas->node = mt_mk_node(node, type);
3994 rcu_assign_pointer(slots[0], entry);
3995 pivots[0] = mas->last;
3996 mas->depth = 1;
3997 mas_set_height(mas);
3998 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3999
4000done:
4001 if (xa_is_node(root))
4002 mte_destroy_walk(root, mas->tree);
4003
4004 return 1;
4005}
4006/*
4007 * mas_wr_spanning_store() - Create a subtree with the store operation completed
4008 * and new nodes where necessary, then place the sub-tree in the actual tree.
4009 * Note that mas is expected to point to the node which caused the store to
4010 * span.
4011 * @wr_mas: The maple write state
4012 *
4013 * Return: 0 on error, positive on success.
4014 */
4015static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
4016{
4017 struct maple_subtree_state mast;
4018 struct maple_big_node b_node;
4019 struct ma_state *mas;
4020 unsigned char height;
4021
4022 /* Left and Right side of spanning store */
4023 MA_STATE(l_mas, NULL, 0, 0);
4024 MA_STATE(r_mas, NULL, 0, 0);
4025
4026 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
4027 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
4028
4029 /*
4030 * A store operation that spans multiple nodes is called a spanning
4031 * store and is handled early in the store call stack by the function
4032 * mas_is_span_wr(). When a spanning store is identified, the maple
4033 * state is duplicated. The first maple state walks the left tree path
4034 * to ``index``, the duplicate walks the right tree path to ``last``.
4035 * The data in the two nodes are combined into a single node, two nodes,
4036 * or possibly three nodes (see the 3-way split above). A ``NULL``
4037 * written to the last entry of a node is considered a spanning store as
4038 * a rebalance is required for the operation to complete and an overflow
4039 * of data may happen.
4040 */
4041 mas = wr_mas->mas;
4042 trace_ma_op(__func__, mas);
4043
4044 if (unlikely(!mas->index && mas->last == ULONG_MAX))
4045 return mas_new_root(mas, wr_mas->entry);
4046 /*
4047 * Node rebalancing may occur due to this store, so there may be three new
4048 * entries per level plus a new root.
4049 */
4050 height = mas_mt_height(mas);
4051 mas_node_count(mas, 1 + height * 3);
4052 if (mas_is_err(mas))
4053 return 0;
4054
4055 /*
4056 * Set up right side. Need to get to the next offset after the spanning
4057 * store to ensure it's not NULL and to combine both the next node and
4058 * the node with the start together.
4059 */
4060 r_mas = *mas;
4061 /* Avoid overflow, walk to next slot in the tree. */
4062 if (r_mas.last + 1)
4063 r_mas.last++;
4064
4065 r_mas.index = r_mas.last;
4066 mas_wr_walk_index(&r_wr_mas);
4067 r_mas.last = r_mas.index = mas->last;
4068
4069 /* Set up left side. */
4070 l_mas = *mas;
4071 mas_wr_walk_index(&l_wr_mas);
4072
4073 if (!wr_mas->entry) {
4074 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
4075 mas->offset = l_mas.offset;
4076 mas->index = l_mas.index;
4077 mas->last = l_mas.last = r_mas.last;
4078 }
4079
4080 /* expanding NULLs may make this cover the entire range */
4081 if (!l_mas.index && r_mas.last == ULONG_MAX) {
4082 mas_set_range(mas, 0, ULONG_MAX);
4083 return mas_new_root(mas, wr_mas->entry);
4084 }
4085
4086 memset(&b_node, 0, sizeof(struct maple_big_node));
4087 /* Copy l_mas and store the value in b_node. */
4088 mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
4089 /* Copy r_mas into b_node. */
4090 if (r_mas.offset <= r_wr_mas.node_end)
4091 mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
4092 &b_node, b_node.b_end + 1);
4093 else
4094 b_node.b_end++;
4095
4096 /* Stop spanning searches by searching for just index. */
4097 l_mas.index = l_mas.last = mas->index;
4098
4099 mast.bn = &b_node;
4100 mast.orig_l = &l_mas;
4101 mast.orig_r = &r_mas;
4102 /* Combine l_mas and r_mas and split them up evenly again. */
4103 return mas_spanning_rebalance(mas, &mast, height + 1);
4104}
4105
4106/*
4107 * mas_wr_node_store() - Attempt to store the value in a node
4108 * @wr_mas: The maple write state
4109 *
4110 * Attempts to reuse the node, but may allocate.
4111 *
4112 * Return: True if stored, false otherwise
4113 */
4114static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
4115{
4116 struct ma_state *mas = wr_mas->mas;
4117 void __rcu **dst_slots;
4118 unsigned long *dst_pivots;
4119 unsigned char dst_offset;
4120 unsigned char new_end = wr_mas->node_end;
4121 unsigned char offset;
4122 unsigned char node_slots = mt_slots[wr_mas->type];
4123 struct maple_node reuse, *newnode;
4124 unsigned char copy_size, max_piv = mt_pivots[wr_mas->type];
4125 bool in_rcu = mt_in_rcu(mas->tree);
4126
4127 offset = mas->offset;
4128 if (mas->last == wr_mas->r_max) {
4129 /* runs right to the end of the node */
4130 if (mas->last == mas->max)
4131 new_end = offset;
4132 /* don't copy this offset */
4133 wr_mas->offset_end++;
4134 } else if (mas->last < wr_mas->r_max) {
4135 /* new range ends in this range */
4136 if (unlikely(wr_mas->r_max == ULONG_MAX))
4137 mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type);
4138
4139 new_end++;
4140 } else {
4141 if (wr_mas->end_piv == mas->last)
4142 wr_mas->offset_end++;
4143
4144 new_end -= wr_mas->offset_end - offset - 1;
4145 }
4146
4147 /* new range starts within a range */
4148 if (wr_mas->r_min < mas->index)
4149 new_end++;
4150
4151 /* Not enough room */
4152 if (new_end >= node_slots)
4153 return false;
4154
4155 /* Not enough data. */
4156 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
4157 !(mas->mas_flags & MA_STATE_BULK))
4158 return false;
4159
4160 /* set up node. */
4161 if (in_rcu) {
4162 mas_node_count(mas, 1);
4163 if (mas_is_err(mas))
4164 return false;
4165
4166 newnode = mas_pop_node(mas);
4167 } else {
4168 memset(&reuse, 0, sizeof(struct maple_node));
4169 newnode = &reuse;
4170 }
4171
4172 newnode->parent = mas_mn(mas)->parent;
4173 dst_pivots = ma_pivots(newnode, wr_mas->type);
4174 dst_slots = ma_slots(newnode, wr_mas->type);
4175 /* Copy from start to insert point */
4176 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * (offset + 1));
4177 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * (offset + 1));
4178 dst_offset = offset;
4179
4180 /* Handle insert of new range starting after old range */
4181 if (wr_mas->r_min < mas->index) {
4182 mas->offset++;
4183 rcu_assign_pointer(dst_slots[dst_offset], wr_mas->content);
4184 dst_pivots[dst_offset++] = mas->index - 1;
4185 }
4186
4187 /* Store the new entry and range end. */
4188 if (dst_offset < max_piv)
4189 dst_pivots[dst_offset] = mas->last;
4190 mas->offset = dst_offset;
4191 rcu_assign_pointer(dst_slots[dst_offset], wr_mas->entry);
4192
4193 /*
4194 * this range wrote to the end of the node or it overwrote the rest of
4195 * the data
4196 */
4197 if (wr_mas->offset_end > wr_mas->node_end || mas->last >= mas->max) {
4198 new_end = dst_offset;
4199 goto done;
4200 }
4201
4202 dst_offset++;
4203 /* Copy to the end of node if necessary. */
4204 copy_size = wr_mas->node_end - wr_mas->offset_end + 1;
4205 memcpy(dst_slots + dst_offset, wr_mas->slots + wr_mas->offset_end,
4206 sizeof(void *) * copy_size);
4207 if (dst_offset < max_piv) {
4208 if (copy_size > max_piv - dst_offset)
4209 copy_size = max_piv - dst_offset;
4210
4211 memcpy(dst_pivots + dst_offset,
4212 wr_mas->pivots + wr_mas->offset_end,
4213 sizeof(unsigned long) * copy_size);
4214 }
4215
4216 if ((wr_mas->node_end == node_slots - 1) && (new_end < node_slots - 1))
4217 dst_pivots[new_end] = mas->max;
4218
4219done:
4220 mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
4221 if (in_rcu) {
c13af03d 4222 mte_set_node_dead(mas->node);
54a611b6
LH
4223 mas->node = mt_mk_node(newnode, wr_mas->type);
4224 mas_replace(mas, false);
4225 } else {
4226 memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
4227 }
4228 trace_ma_write(__func__, mas, 0, wr_mas->entry);
4229 mas_update_gap(mas);
4230 return true;
4231}
4232
4233/*
4234 * mas_wr_slot_store: Attempt to store a value in a slot.
4235 * @wr_mas: the maple write state
4236 *
4237 * Return: True if stored, false otherwise
4238 */
4239static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
4240{
4241 struct ma_state *mas = wr_mas->mas;
4242 unsigned long lmax; /* Logical max. */
4243 unsigned char offset = mas->offset;
4244
4245 if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
4246 (offset != wr_mas->node_end)))
4247 return false;
4248
4249 if (offset == wr_mas->node_end - 1)
4250 lmax = mas->max;
4251 else
4252 lmax = wr_mas->pivots[offset + 1];
4253
4254 /* going to overwrite too many slots. */
4255 if (lmax < mas->last)
4256 return false;
4257
4258 if (wr_mas->r_min == mas->index) {
4259 /* overwriting two or more ranges with one. */
4260 if (lmax == mas->last)
4261 return false;
4262
4263 /* Overwriting all of offset and a portion of offset + 1. */
4264 rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
4265 wr_mas->pivots[offset] = mas->last;
4266 goto done;
4267 }
4268
4269 /* Doesn't end on the next range end. */
4270 if (lmax != mas->last)
4271 return false;
4272
4273 /* Overwriting a portion of offset and all of offset + 1 */
4274 if ((offset + 1 < mt_pivots[wr_mas->type]) &&
4275 (wr_mas->entry || wr_mas->pivots[offset + 1]))
4276 wr_mas->pivots[offset + 1] = mas->last;
4277
4278 rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
4279 wr_mas->pivots[offset] = mas->index - 1;
4280 mas->offset++; /* Keep mas accurate. */
4281
4282done:
4283 trace_ma_write(__func__, mas, 0, wr_mas->entry);
4284 mas_update_gap(mas);
4285 return true;
4286}
4287
4288static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
4289{
4290 while ((wr_mas->mas->last > wr_mas->end_piv) &&
4291 (wr_mas->offset_end < wr_mas->node_end))
4292 wr_mas->end_piv = wr_mas->pivots[++wr_mas->offset_end];
4293
4294 if (wr_mas->mas->last > wr_mas->end_piv)
4295 wr_mas->end_piv = wr_mas->mas->max;
4296}
4297
4298static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
4299{
4300 struct ma_state *mas = wr_mas->mas;
4301
4302 if (mas->last < wr_mas->end_piv && !wr_mas->slots[wr_mas->offset_end])
4303 mas->last = wr_mas->end_piv;
4304
4305 /* Check next slot(s) if we are overwriting the end */
4306 if ((mas->last == wr_mas->end_piv) &&
4307 (wr_mas->node_end != wr_mas->offset_end) &&
4308 !wr_mas->slots[wr_mas->offset_end + 1]) {
4309 wr_mas->offset_end++;
4310 if (wr_mas->offset_end == wr_mas->node_end)
4311 mas->last = mas->max;
4312 else
4313 mas->last = wr_mas->pivots[wr_mas->offset_end];
4314 wr_mas->end_piv = mas->last;
4315 }
4316
4317 if (!wr_mas->content) {
4318 /* If this one is null, the next and prev are not */
4319 mas->index = wr_mas->r_min;
4320 } else {
4321 /* Check prev slot if we are overwriting the start */
4322 if (mas->index == wr_mas->r_min && mas->offset &&
4323 !wr_mas->slots[mas->offset - 1]) {
4324 mas->offset--;
4325 wr_mas->r_min = mas->index =
4326 mas_safe_min(mas, wr_mas->pivots, mas->offset);
4327 wr_mas->r_max = wr_mas->pivots[mas->offset];
4328 }
4329 }
4330}
4331
4332static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
4333{
4334 unsigned char end = wr_mas->node_end;
4335 unsigned char new_end = end + 1;
4336 struct ma_state *mas = wr_mas->mas;
4337 unsigned char node_pivots = mt_pivots[wr_mas->type];
4338
4339 if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) {
4340 if (new_end < node_pivots)
4341 wr_mas->pivots[new_end] = wr_mas->pivots[end];
4342
4343 if (new_end < node_pivots)
4344 ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
4345
4346 rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->entry);
4347 mas->offset = new_end;
4348 wr_mas->pivots[end] = mas->index - 1;
4349
4350 return true;
4351 }
4352
4353 if ((mas->index == wr_mas->r_min) && (mas->last < wr_mas->r_max)) {
4354 if (new_end < node_pivots)
4355 wr_mas->pivots[new_end] = wr_mas->pivots[end];
4356
4357 rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content);
4358 if (new_end < node_pivots)
4359 ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
4360
4361 wr_mas->pivots[end] = mas->last;
4362 rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry);
4363 return true;
4364 }
4365
4366 return false;
4367}
4368
4369/*
4370 * mas_wr_bnode() - Slow path for a modification.
4371 * @wr_mas: The write maple state
4372 *
4373 * This is where split, rebalance end up.
4374 */
4375static void mas_wr_bnode(struct ma_wr_state *wr_mas)
4376{
4377 struct maple_big_node b_node;
4378
4379 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
4380 memset(&b_node, 0, sizeof(struct maple_big_node));
4381 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
4382 mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end);
4383}
4384
4385static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
4386{
4387 unsigned char node_slots;
4388 unsigned char node_size;
4389 struct ma_state *mas = wr_mas->mas;
4390
4391 /* Direct replacement */
4392 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
4393 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
4394 if (!!wr_mas->entry ^ !!wr_mas->content)
4395 mas_update_gap(mas);
4396 return;
4397 }
4398
4399 /* Attempt to append */
4400 node_slots = mt_slots[wr_mas->type];
4401 node_size = wr_mas->node_end - wr_mas->offset_end + mas->offset + 2;
4402 if (mas->max == ULONG_MAX)
4403 node_size++;
4404
4405 /* slot and node store will not fit, go to the slow path */
4406 if (unlikely(node_size >= node_slots))
4407 goto slow_path;
4408
4409 if (wr_mas->entry && (wr_mas->node_end < node_slots - 1) &&
4410 (mas->offset == wr_mas->node_end) && mas_wr_append(wr_mas)) {
4411 if (!wr_mas->content || !wr_mas->entry)
4412 mas_update_gap(mas);
4413 return;
4414 }
4415
4416 if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
4417 return;
4418 else if (mas_wr_node_store(wr_mas))
4419 return;
4420
4421 if (mas_is_err(mas))
4422 return;
4423
4424slow_path:
4425 mas_wr_bnode(wr_mas);
4426}
4427
4428/*
4429 * mas_wr_store_entry() - Internal call to store a value
4430 * @mas: The maple state
4431 * @entry: The entry to store.
4432 *
4433 * Return: The contents that was stored at the index.
4434 */
4435static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
4436{
4437 struct ma_state *mas = wr_mas->mas;
4438
4439 wr_mas->content = mas_start(mas);
4440 if (mas_is_none(mas) || mas_is_ptr(mas)) {
4441 mas_store_root(mas, wr_mas->entry);
4442 return wr_mas->content;
4443 }
4444
4445 if (unlikely(!mas_wr_walk(wr_mas))) {
4446 mas_wr_spanning_store(wr_mas);
4447 return wr_mas->content;
4448 }
4449
4450 /* At this point, we are at the leaf node that needs to be altered. */
4451 wr_mas->end_piv = wr_mas->r_max;
4452 mas_wr_end_piv(wr_mas);
4453
4454 if (!wr_mas->entry)
4455 mas_wr_extend_null(wr_mas);
4456
4457 /* New root for a single pointer */
4458 if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
4459 mas_new_root(mas, wr_mas->entry);
4460 return wr_mas->content;
4461 }
4462
4463 mas_wr_modify(wr_mas);
4464 return wr_mas->content;
4465}
4466
4467/**
4468 * mas_insert() - Internal call to insert a value
4469 * @mas: The maple state
4470 * @entry: The entry to store
4471 *
4472 * Return: %NULL or the contents that already exists at the requested index
4473 * otherwise. The maple state needs to be checked for error conditions.
4474 */
4475static inline void *mas_insert(struct ma_state *mas, void *entry)
4476{
4477 MA_WR_STATE(wr_mas, mas, entry);
4478
4479 /*
4480 * Inserting a new range inserts either 0, 1, or 2 pivots within the
4481 * tree. If the insert fits exactly into an existing gap with a value
4482 * of NULL, then the slot only needs to be written with the new value.
4483 * If the range being inserted is adjacent to another range, then only a
4484 * single pivot needs to be inserted (as well as writing the entry). If
4485 * the new range is within a gap but does not touch any other ranges,
4486 * then two pivots need to be inserted: the start - 1, and the end. As
4487 * usual, the entry must be written. Most operations require a new node
4488 * to be allocated and replace an existing node to ensure RCU safety,
4489 * when in RCU mode. The exception to requiring a newly allocated node
4490 * is when inserting at the end of a node (appending). When done
4491 * carefully, appending can reuse the node in place.
4492 */
4493 wr_mas.content = mas_start(mas);
4494 if (wr_mas.content)
4495 goto exists;
4496
4497 if (mas_is_none(mas) || mas_is_ptr(mas)) {
4498 mas_store_root(mas, entry);
4499 return NULL;
4500 }
4501
4502 /* spanning writes always overwrite something */
4503 if (!mas_wr_walk(&wr_mas))
4504 goto exists;
4505
4506 /* At this point, we are at the leaf node that needs to be altered. */
4507 wr_mas.offset_end = mas->offset;
4508 wr_mas.end_piv = wr_mas.r_max;
4509
4510 if (wr_mas.content || (mas->last > wr_mas.r_max))
4511 goto exists;
4512
4513 if (!entry)
4514 return NULL;
4515
4516 mas_wr_modify(&wr_mas);
4517 return wr_mas.content;
4518
4519exists:
4520 mas_set_err(mas, -EEXIST);
4521 return wr_mas.content;
4522
4523}
4524
4525/*
4526 * mas_prev_node() - Find the prev non-null entry at the same level in the
4527 * tree. The prev value will be mas->node[mas->offset] or MAS_NONE.
4528 * @mas: The maple state
4529 * @min: The lower limit to search
4530 *
4531 * The prev node value will be mas->node[mas->offset] or MAS_NONE.
4532 * Return: 1 if the node is dead, 0 otherwise.
4533 */
4534static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
4535{
4536 enum maple_type mt;
4537 int offset, level;
4538 void __rcu **slots;
4539 struct maple_node *node;
4540 struct maple_enode *enode;
4541 unsigned long *pivots;
4542
4543 if (mas_is_none(mas))
4544 return 0;
4545
4546 level = 0;
4547 do {
4548 node = mas_mn(mas);
4549 if (ma_is_root(node))
4550 goto no_entry;
4551
4552 /* Walk up. */
4553 if (unlikely(mas_ascend(mas)))
4554 return 1;
4555 offset = mas->offset;
4556 level++;
4557 } while (!offset);
4558
4559 offset--;
4560 mt = mte_node_type(mas->node);
4561 node = mas_mn(mas);
4562 slots = ma_slots(node, mt);
4563 pivots = ma_pivots(node, mt);
39d0bd86
LH
4564 if (unlikely(ma_dead_node(node)))
4565 return 1;
4566
54a611b6
LH
4567 mas->max = pivots[offset];
4568 if (offset)
4569 mas->min = pivots[offset - 1] + 1;
4570 if (unlikely(ma_dead_node(node)))
4571 return 1;
4572
4573 if (mas->max < min)
4574 goto no_entry_min;
4575
4576 while (level > 1) {
4577 level--;
4578 enode = mas_slot(mas, slots, offset);
4579 if (unlikely(ma_dead_node(node)))
4580 return 1;
4581
4582 mas->node = enode;
4583 mt = mte_node_type(mas->node);
4584 node = mas_mn(mas);
4585 slots = ma_slots(node, mt);
4586 pivots = ma_pivots(node, mt);
4587 offset = ma_data_end(node, mt, pivots, mas->max);
39d0bd86
LH
4588 if (unlikely(ma_dead_node(node)))
4589 return 1;
4590
54a611b6
LH
4591 if (offset)
4592 mas->min = pivots[offset - 1] + 1;
4593
4594 if (offset < mt_pivots[mt])
4595 mas->max = pivots[offset];
4596
4597 if (mas->max < min)
4598 goto no_entry;
4599 }
4600
4601 mas->node = mas_slot(mas, slots, offset);
4602 if (unlikely(ma_dead_node(node)))
4603 return 1;
4604
4605 mas->offset = mas_data_end(mas);
4606 if (unlikely(mte_dead_node(mas->node)))
4607 return 1;
4608
4609 return 0;
4610
4611no_entry_min:
4612 mas->offset = offset;
4613 if (offset)
4614 mas->min = pivots[offset - 1] + 1;
4615no_entry:
4616 if (unlikely(ma_dead_node(node)))
4617 return 1;
4618
4619 mas->node = MAS_NONE;
4620 return 0;
4621}
4622
4623/*
4624 * mas_next_node() - Get the next node at the same level in the tree.
4625 * @mas: The maple state
4626 * @max: The maximum pivot value to check.
4627 *
4628 * The next value will be mas->node[mas->offset] or MAS_NONE.
4629 * Return: 1 on dead node, 0 otherwise.
4630 */
4631static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
4632 unsigned long max)
4633{
4634 unsigned long min, pivot;
4635 unsigned long *pivots;
4636 struct maple_enode *enode;
4637 int level = 0;
4638 unsigned char offset;
39d0bd86 4639 unsigned char node_end;
54a611b6
LH
4640 enum maple_type mt;
4641 void __rcu **slots;
4642
4643 if (mas->max >= max)
4644 goto no_entry;
4645
4646 level = 0;
4647 do {
4648 if (ma_is_root(node))
4649 goto no_entry;
4650
4651 min = mas->max + 1;
4652 if (min > max)
4653 goto no_entry;
4654
4655 if (unlikely(mas_ascend(mas)))
4656 return 1;
4657
4658 offset = mas->offset;
4659 level++;
4660 node = mas_mn(mas);
4661 mt = mte_node_type(mas->node);
4662 pivots = ma_pivots(node, mt);
39d0bd86
LH
4663 node_end = ma_data_end(node, mt, pivots, mas->max);
4664 if (unlikely(ma_dead_node(node)))
4665 return 1;
4666
4667 } while (unlikely(offset == node_end));
54a611b6
LH
4668
4669 slots = ma_slots(node, mt);
4670 pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
4671 while (unlikely(level > 1)) {
4672 /* Descend, if necessary */
4673 enode = mas_slot(mas, slots, offset);
4674 if (unlikely(ma_dead_node(node)))
4675 return 1;
4676
4677 mas->node = enode;
4678 level--;
4679 node = mas_mn(mas);
4680 mt = mte_node_type(mas->node);
4681 slots = ma_slots(node, mt);
4682 pivots = ma_pivots(node, mt);
39d0bd86
LH
4683 if (unlikely(ma_dead_node(node)))
4684 return 1;
4685
54a611b6
LH
4686 offset = 0;
4687 pivot = pivots[0];
4688 }
4689
4690 enode = mas_slot(mas, slots, offset);
4691 if (unlikely(ma_dead_node(node)))
4692 return 1;
4693
4694 mas->node = enode;
4695 mas->min = min;
4696 mas->max = pivot;
4697 return 0;
4698
4699no_entry:
4700 if (unlikely(ma_dead_node(node)))
4701 return 1;
4702
4703 mas->node = MAS_NONE;
4704 return 0;
4705}
4706
4707/*
4708 * mas_next_nentry() - Get the next node entry
4709 * @mas: The maple state
4710 * @max: The maximum value to check
4711 * @*range_start: Pointer to store the start of the range.
4712 *
4713 * Sets @mas->offset to the offset of the next node entry, @mas->last to the
4714 * pivot of the entry.
4715 *
4716 * Return: The next entry, %NULL otherwise
4717 */
4718static inline void *mas_next_nentry(struct ma_state *mas,
4719 struct maple_node *node, unsigned long max, enum maple_type type)
4720{
4721 unsigned char count;
4722 unsigned long pivot;
4723 unsigned long *pivots;
4724 void __rcu **slots;
4725 void *entry;
4726
4727 if (mas->last == mas->max) {
4728 mas->index = mas->max;
4729 return NULL;
4730 }
4731
54a611b6 4732 slots = ma_slots(node, type);
39d0bd86 4733 pivots = ma_pivots(node, type);
65be6f05 4734 count = ma_data_end(node, type, pivots, mas->max);
39d0bd86
LH
4735 if (unlikely(ma_dead_node(node)))
4736 return NULL;
4737
4738 mas->index = mas_safe_min(mas, pivots, mas->offset);
4739 if (unlikely(ma_dead_node(node)))
54a611b6
LH
4740 return NULL;
4741
4742 if (mas->index > max)
4743 return NULL;
4744
54a611b6
LH
4745 if (mas->offset > count)
4746 return NULL;
4747
4748 while (mas->offset < count) {
4749 pivot = pivots[mas->offset];
4750 entry = mas_slot(mas, slots, mas->offset);
4751 if (ma_dead_node(node))
4752 return NULL;
4753
4754 if (entry)
4755 goto found;
4756
4757 if (pivot >= max)
4758 return NULL;
4759
4760 mas->index = pivot + 1;
4761 mas->offset++;
4762 }
4763
4764 if (mas->index > mas->max) {
4765 mas->index = mas->last;
4766 return NULL;
4767 }
4768
4769 pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
4770 entry = mas_slot(mas, slots, mas->offset);
4771 if (ma_dead_node(node))
4772 return NULL;
4773
4774 if (!pivot)
4775 return NULL;
4776
4777 if (!entry)
4778 return NULL;
4779
4780found:
4781 mas->last = pivot;
4782 return entry;
4783}
4784
4785static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
4786{
54a611b6
LH
4787retry:
4788 mas_set(mas, index);
4789 mas_state_walk(mas);
4790 if (mas_is_start(mas))
4791 goto retry;
54a611b6
LH
4792}
4793
4794/*
4795 * mas_next_entry() - Internal function to get the next entry.
4796 * @mas: The maple state
4797 * @limit: The maximum range start.
4798 *
4799 * Set the @mas->node to the next entry and the range_start to
4800 * the beginning value for the entry. Does not check beyond @limit.
4801 * Sets @mas->index and @mas->last to the limit if it is hit.
4802 * Restarts on dead nodes.
4803 *
4804 * Return: the next entry or %NULL.
4805 */
4806static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
4807{
4808 void *entry = NULL;
4809 struct maple_enode *prev_node;
4810 struct maple_node *node;
4811 unsigned char offset;
4812 unsigned long last;
4813 enum maple_type mt;
4814
50e81c82
LH
4815 if (mas->index > limit) {
4816 mas->index = mas->last = limit;
4817 mas_pause(mas);
4818 return NULL;
4819 }
54a611b6
LH
4820 last = mas->last;
4821retry:
4822 offset = mas->offset;
4823 prev_node = mas->node;
4824 node = mas_mn(mas);
4825 mt = mte_node_type(mas->node);
4826 mas->offset++;
4827 if (unlikely(mas->offset >= mt_slots[mt])) {
4828 mas->offset = mt_slots[mt] - 1;
4829 goto next_node;
4830 }
4831
4832 while (!mas_is_none(mas)) {
4833 entry = mas_next_nentry(mas, node, limit, mt);
4834 if (unlikely(ma_dead_node(node))) {
4835 mas_rewalk(mas, last);
4836 goto retry;
4837 }
4838
4839 if (likely(entry))
4840 return entry;
4841
4842 if (unlikely((mas->index > limit)))
4843 break;
4844
4845next_node:
4846 prev_node = mas->node;
4847 offset = mas->offset;
4848 if (unlikely(mas_next_node(mas, node, limit))) {
4849 mas_rewalk(mas, last);
4850 goto retry;
4851 }
4852 mas->offset = 0;
4853 node = mas_mn(mas);
4854 mt = mte_node_type(mas->node);
4855 }
4856
4857 mas->index = mas->last = limit;
4858 mas->offset = offset;
4859 mas->node = prev_node;
4860 return NULL;
4861}
4862
4863/*
4864 * mas_prev_nentry() - Get the previous node entry.
4865 * @mas: The maple state.
4866 * @limit: The lower limit to check for a value.
4867 *
4868 * Return: the entry, %NULL otherwise.
4869 */
4870static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit,
4871 unsigned long index)
4872{
4873 unsigned long pivot, min;
4874 unsigned char offset;
4875 struct maple_node *mn;
4876 enum maple_type mt;
4877 unsigned long *pivots;
4878 void __rcu **slots;
4879 void *entry;
4880
4881retry:
4882 if (!mas->offset)
4883 return NULL;
4884
4885 mn = mas_mn(mas);
4886 mt = mte_node_type(mas->node);
4887 offset = mas->offset - 1;
4888 if (offset >= mt_slots[mt])
4889 offset = mt_slots[mt] - 1;
4890
4891 slots = ma_slots(mn, mt);
4892 pivots = ma_pivots(mn, mt);
39d0bd86
LH
4893 if (unlikely(ma_dead_node(mn))) {
4894 mas_rewalk(mas, index);
4895 goto retry;
4896 }
4897
54a611b6
LH
4898 if (offset == mt_pivots[mt])
4899 pivot = mas->max;
4900 else
4901 pivot = pivots[offset];
4902
4903 if (unlikely(ma_dead_node(mn))) {
4904 mas_rewalk(mas, index);
4905 goto retry;
4906 }
4907
4908 while (offset && ((!mas_slot(mas, slots, offset) && pivot >= limit) ||
4909 !pivot))
4910 pivot = pivots[--offset];
4911
4912 min = mas_safe_min(mas, pivots, offset);
4913 entry = mas_slot(mas, slots, offset);
4914 if (unlikely(ma_dead_node(mn))) {
4915 mas_rewalk(mas, index);
4916 goto retry;
4917 }
4918
4919 if (likely(entry)) {
4920 mas->offset = offset;
4921 mas->last = pivot;
4922 mas->index = min;
4923 }
4924 return entry;
4925}
4926
4927static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min)
4928{
4929 void *entry;
4930
50e81c82
LH
4931 if (mas->index < min) {
4932 mas->index = mas->last = min;
17dc622c 4933 mas->node = MAS_NONE;
50e81c82
LH
4934 return NULL;
4935 }
54a611b6
LH
4936retry:
4937 while (likely(!mas_is_none(mas))) {
4938 entry = mas_prev_nentry(mas, min, mas->index);
4939 if (unlikely(mas->last < min))
4940 goto not_found;
4941
4942 if (likely(entry))
4943 return entry;
4944
4945 if (unlikely(mas_prev_node(mas, min))) {
4946 mas_rewalk(mas, mas->index);
4947 goto retry;
4948 }
4949
4950 mas->offset++;
4951 }
4952
4953 mas->offset--;
4954not_found:
4955 mas->index = mas->last = min;
4956 return NULL;
4957}
4958
4959/*
4960 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the
4961 * highest gap address of a given size in a given node and descend.
4962 * @mas: The maple state
4963 * @size: The needed size.
4964 *
4965 * Return: True if found in a leaf, false otherwise.
4966 *
4967 */
fad8e429
LH
4968static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
4969 unsigned long *gap_min, unsigned long *gap_max)
54a611b6
LH
4970{
4971 enum maple_type type = mte_node_type(mas->node);
4972 struct maple_node *node = mas_mn(mas);
4973 unsigned long *pivots, *gaps;
4974 void __rcu **slots;
4975 unsigned long gap = 0;
7327e811 4976 unsigned long max, min;
54a611b6
LH
4977 unsigned char offset;
4978
4979 if (unlikely(mas_is_err(mas)))
4980 return true;
4981
4982 if (ma_is_dense(type)) {
4983 /* dense nodes. */
4984 mas->offset = (unsigned char)(mas->index - mas->min);
4985 return true;
4986 }
4987
4988 pivots = ma_pivots(node, type);
4989 slots = ma_slots(node, type);
4990 gaps = ma_gaps(node, type);
4991 offset = mas->offset;
4992 min = mas_safe_min(mas, pivots, offset);
4993 /* Skip out of bounds. */
4994 while (mas->last < min)
4995 min = mas_safe_min(mas, pivots, --offset);
4996
4997 max = mas_safe_pivot(mas, pivots, offset, type);
7327e811 4998 while (mas->index <= max) {
54a611b6
LH
4999 gap = 0;
5000 if (gaps)
5001 gap = gaps[offset];
5002 else if (!mas_slot(mas, slots, offset))
5003 gap = max - min + 1;
5004
5005 if (gap) {
5006 if ((size <= gap) && (size <= mas->last - min + 1))
5007 break;
5008
5009 if (!gaps) {
5010 /* Skip the next slot, it cannot be a gap. */
5011 if (offset < 2)
5012 goto ascend;
5013
5014 offset -= 2;
5015 max = pivots[offset];
5016 min = mas_safe_min(mas, pivots, offset);
5017 continue;
5018 }
5019 }
5020
5021 if (!offset)
5022 goto ascend;
5023
5024 offset--;
5025 max = min - 1;
5026 min = mas_safe_min(mas, pivots, offset);
5027 }
5028
7327e811
LH
5029 if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
5030 goto no_space;
54a611b6
LH
5031
5032 if (unlikely(ma_is_leaf(type))) {
5033 mas->offset = offset;
fad8e429
LH
5034 *gap_min = min;
5035 *gap_max = min + gap - 1;
54a611b6
LH
5036 return true;
5037 }
5038
5039 /* descend, only happens under lock. */
5040 mas->node = mas_slot(mas, slots, offset);
5041 mas->min = min;
5042 mas->max = max;
5043 mas->offset = mas_data_end(mas);
5044 return false;
5045
5046ascend:
7327e811
LH
5047 if (!mte_is_root(mas->node))
5048 return false;
54a611b6 5049
7327e811
LH
5050no_space:
5051 mas_set_err(mas, -EBUSY);
54a611b6
LH
5052 return false;
5053}
5054
5055static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
5056{
5057 enum maple_type type = mte_node_type(mas->node);
5058 unsigned long pivot, min, gap = 0;
06e8fd99
LH
5059 unsigned char offset, data_end;
5060 unsigned long *gaps, *pivots;
5061 void __rcu **slots;
5062 struct maple_node *node;
54a611b6
LH
5063 bool found = false;
5064
5065 if (ma_is_dense(type)) {
5066 mas->offset = (unsigned char)(mas->index - mas->min);
5067 return true;
5068 }
5069
06e8fd99
LH
5070 node = mas_mn(mas);
5071 pivots = ma_pivots(node, type);
5072 slots = ma_slots(node, type);
5073 gaps = ma_gaps(node, type);
54a611b6 5074 offset = mas->offset;
54a611b6 5075 min = mas_safe_min(mas, pivots, offset);
06e8fd99
LH
5076 data_end = ma_data_end(node, type, pivots, mas->max);
5077 for (; offset <= data_end; offset++) {
5078 pivot = mas_logical_pivot(mas, pivots, offset, type);
54a611b6
LH
5079
5080 /* Not within lower bounds */
5081 if (mas->index > pivot)
5082 goto next_slot;
5083
5084 if (gaps)
5085 gap = gaps[offset];
5086 else if (!mas_slot(mas, slots, offset))
5087 gap = min(pivot, mas->last) - max(mas->index, min) + 1;
5088 else
5089 goto next_slot;
5090
5091 if (gap >= size) {
5092 if (ma_is_leaf(type)) {
5093 found = true;
5094 goto done;
5095 }
5096 if (mas->index <= pivot) {
5097 mas->node = mas_slot(mas, slots, offset);
5098 mas->min = min;
5099 mas->max = pivot;
5100 offset = 0;
54a611b6
LH
5101 break;
5102 }
5103 }
5104next_slot:
5105 min = pivot + 1;
5106 if (mas->last <= pivot) {
5107 mas_set_err(mas, -EBUSY);
5108 return true;
5109 }
5110 }
5111
5112 if (mte_is_root(mas->node))
5113 found = true;
5114done:
5115 mas->offset = offset;
5116 return found;
5117}
5118
5119/**
5120 * mas_walk() - Search for @mas->index in the tree.
5121 * @mas: The maple state.
5122 *
5123 * mas->index and mas->last will be set to the range if there is a value. If
5124 * mas->node is MAS_NONE, reset to MAS_START.
5125 *
5126 * Return: the entry at the location or %NULL.
5127 */
5128void *mas_walk(struct ma_state *mas)
5129{
5130 void *entry;
5131
5132retry:
5133 entry = mas_state_walk(mas);
5134 if (mas_is_start(mas))
5135 goto retry;
5136
5137 if (mas_is_ptr(mas)) {
5138 if (!mas->index) {
5139 mas->last = 0;
5140 } else {
5141 mas->index = 1;
5142 mas->last = ULONG_MAX;
5143 }
5144 return entry;
5145 }
5146
5147 if (mas_is_none(mas)) {
5148 mas->index = 0;
5149 mas->last = ULONG_MAX;
5150 }
5151
5152 return entry;
5153}
120b1162 5154EXPORT_SYMBOL_GPL(mas_walk);
54a611b6
LH
5155
5156static inline bool mas_rewind_node(struct ma_state *mas)
5157{
5158 unsigned char slot;
5159
5160 do {
5161 if (mte_is_root(mas->node)) {
5162 slot = mas->offset;
5163 if (!slot)
5164 return false;
5165 } else {
5166 mas_ascend(mas);
5167 slot = mas->offset;
5168 }
5169 } while (!slot);
5170
5171 mas->offset = --slot;
5172 return true;
5173}
5174
5175/*
5176 * mas_skip_node() - Internal function. Skip over a node.
5177 * @mas: The maple state.
5178 *
5179 * Return: true if there is another node, false otherwise.
5180 */
5181static inline bool mas_skip_node(struct ma_state *mas)
5182{
0fa99fdf
LH
5183 if (mas_is_err(mas))
5184 return false;
54a611b6 5185
54a611b6
LH
5186 do {
5187 if (mte_is_root(mas->node)) {
0fa99fdf 5188 if (mas->offset >= mas_data_end(mas)) {
54a611b6
LH
5189 mas_set_err(mas, -EBUSY);
5190 return false;
5191 }
5192 } else {
5193 mas_ascend(mas);
54a611b6 5194 }
0fa99fdf 5195 } while (mas->offset >= mas_data_end(mas));
54a611b6 5196
0fa99fdf 5197 mas->offset++;
54a611b6
LH
5198 return true;
5199}
5200
5201/*
5202 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of
5203 * @size
5204 * @mas: The maple state
5205 * @size: The size of the gap required
5206 *
5207 * Search between @mas->index and @mas->last for a gap of @size.
5208 */
5209static inline void mas_awalk(struct ma_state *mas, unsigned long size)
5210{
5211 struct maple_enode *last = NULL;
5212
5213 /*
5214 * There are 4 options:
5215 * go to child (descend)
5216 * go back to parent (ascend)
5217 * no gap found. (return, slot == MAPLE_NODE_SLOTS)
5218 * found the gap. (return, slot != MAPLE_NODE_SLOTS)
5219 */
5220 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
5221 if (last == mas->node)
5222 mas_skip_node(mas);
5223 else
5224 last = mas->node;
5225 }
5226}
5227
5228/*
5229 * mas_fill_gap() - Fill a located gap with @entry.
5230 * @mas: The maple state
5231 * @entry: The value to store
5232 * @slot: The offset into the node to store the @entry
5233 * @size: The size of the entry
5234 * @index: The start location
5235 */
5236static inline void mas_fill_gap(struct ma_state *mas, void *entry,
5237 unsigned char slot, unsigned long size, unsigned long *index)
5238{
5239 MA_WR_STATE(wr_mas, mas, entry);
5240 unsigned char pslot = mte_parent_slot(mas->node);
5241 struct maple_enode *mn = mas->node;
5242 unsigned long *pivots;
5243 enum maple_type ptype;
5244 /*
5245 * mas->index is the start address for the search
5246 * which may no longer be needed.
5247 * mas->last is the end address for the search
5248 */
5249
5250 *index = mas->index;
5251 mas->last = mas->index + size - 1;
5252
5253 /*
5254 * It is possible that using mas->max and mas->min to correctly
5255 * calculate the index and last will cause an issue in the gap
5256 * calculation, so fix the ma_state here
5257 */
5258 mas_ascend(mas);
5259 ptype = mte_node_type(mas->node);
5260 pivots = ma_pivots(mas_mn(mas), ptype);
5261 mas->max = mas_safe_pivot(mas, pivots, pslot, ptype);
5262 mas->min = mas_safe_min(mas, pivots, pslot);
5263 mas->node = mn;
5264 mas->offset = slot;
5265 mas_wr_store_entry(&wr_mas);
5266}
5267
5268/*
5269 * mas_sparse_area() - Internal function. Return upper or lower limit when
5270 * searching for a gap in an empty tree.
5271 * @mas: The maple state
5272 * @min: the minimum range
5273 * @max: The maximum range
5274 * @size: The size of the gap
5275 * @fwd: Searching forward or back
5276 */
5277static inline void mas_sparse_area(struct ma_state *mas, unsigned long min,
5278 unsigned long max, unsigned long size, bool fwd)
5279{
5280 unsigned long start = 0;
5281
5282 if (!unlikely(mas_is_none(mas)))
5283 start++;
5284 /* mas_is_ptr */
5285
5286 if (start < min)
5287 start = min;
5288
5289 if (fwd) {
5290 mas->index = start;
5291 mas->last = start + size - 1;
5292 return;
5293 }
5294
5295 mas->index = max;
5296}
5297
5298/*
5299 * mas_empty_area() - Get the lowest address within the range that is
5300 * sufficient for the size requested.
5301 * @mas: The maple state
5302 * @min: The lowest value of the range
5303 * @max: The highest value of the range
5304 * @size: The size needed
5305 */
5306int mas_empty_area(struct ma_state *mas, unsigned long min,
5307 unsigned long max, unsigned long size)
5308{
5309 unsigned char offset;
5310 unsigned long *pivots;
5311 enum maple_type mt;
5312
fad8e429
LH
5313 if (min >= max)
5314 return -EINVAL;
5315
54a611b6
LH
5316 if (mas_is_start(mas))
5317 mas_start(mas);
5318 else if (mas->offset >= 2)
5319 mas->offset -= 2;
5320 else if (!mas_skip_node(mas))
5321 return -EBUSY;
5322
5323 /* Empty set */
5324 if (mas_is_none(mas) || mas_is_ptr(mas)) {
5325 mas_sparse_area(mas, min, max, size, true);
5326 return 0;
5327 }
5328
5329 /* The start of the window can only be within these values */
5330 mas->index = min;
5331 mas->last = max;
5332 mas_awalk(mas, size);
5333
5334 if (unlikely(mas_is_err(mas)))
5335 return xa_err(mas->node);
5336
5337 offset = mas->offset;
5338 if (unlikely(offset == MAPLE_NODE_SLOTS))
5339 return -EBUSY;
5340
5341 mt = mte_node_type(mas->node);
5342 pivots = ma_pivots(mas_mn(mas), mt);
5343 if (offset)
5344 mas->min = pivots[offset - 1] + 1;
5345
5346 if (offset < mt_pivots[mt])
5347 mas->max = pivots[offset];
5348
5349 if (mas->index < mas->min)
5350 mas->index = mas->min;
5351
5352 mas->last = mas->index + size - 1;
5353 return 0;
5354}
120b1162 5355EXPORT_SYMBOL_GPL(mas_empty_area);
54a611b6
LH
5356
5357/*
5358 * mas_empty_area_rev() - Get the highest address within the range that is
5359 * sufficient for the size requested.
5360 * @mas: The maple state
5361 * @min: The lowest value of the range
5362 * @max: The highest value of the range
5363 * @size: The size needed
5364 */
5365int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
5366 unsigned long max, unsigned long size)
5367{
5368 struct maple_enode *last = mas->node;
5369
fad8e429
LH
5370 if (min >= max)
5371 return -EINVAL;
5372
54a611b6
LH
5373 if (mas_is_start(mas)) {
5374 mas_start(mas);
5375 mas->offset = mas_data_end(mas);
5376 } else if (mas->offset >= 2) {
5377 mas->offset -= 2;
5378 } else if (!mas_rewind_node(mas)) {
5379 return -EBUSY;
5380 }
5381
5382 /* Empty set. */
5383 if (mas_is_none(mas) || mas_is_ptr(mas)) {
5384 mas_sparse_area(mas, min, max, size, false);
5385 return 0;
5386 }
5387
5388 /* The start of the window can only be within these values. */
5389 mas->index = min;
5390 mas->last = max;
5391
fad8e429 5392 while (!mas_rev_awalk(mas, size, &min, &max)) {
54a611b6
LH
5393 if (last == mas->node) {
5394 if (!mas_rewind_node(mas))
5395 return -EBUSY;
5396 } else {
5397 last = mas->node;
5398 }
5399 }
5400
5401 if (mas_is_err(mas))
5402 return xa_err(mas->node);
5403
5404 if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
5405 return -EBUSY;
5406
54a611b6 5407 /* Trim the upper limit to the max. */
fad8e429
LH
5408 if (max <= mas->last)
5409 mas->last = max;
54a611b6
LH
5410
5411 mas->index = mas->last - size + 1;
5412 return 0;
5413}
120b1162 5414EXPORT_SYMBOL_GPL(mas_empty_area_rev);
54a611b6
LH
5415
5416static inline int mas_alloc(struct ma_state *mas, void *entry,
5417 unsigned long size, unsigned long *index)
5418{
5419 unsigned long min;
5420
5421 mas_start(mas);
5422 if (mas_is_none(mas) || mas_is_ptr(mas)) {
5423 mas_root_expand(mas, entry);
5424 if (mas_is_err(mas))
5425 return xa_err(mas->node);
5426
5427 if (!mas->index)
5428 return mte_pivot(mas->node, 0);
5429 return mte_pivot(mas->node, 1);
5430 }
5431
5432 /* Must be walking a tree. */
5433 mas_awalk(mas, size);
5434 if (mas_is_err(mas))
5435 return xa_err(mas->node);
5436
5437 if (mas->offset == MAPLE_NODE_SLOTS)
5438 goto no_gap;
5439
5440 /*
5441 * At this point, mas->node points to the right node and we have an
5442 * offset that has a sufficient gap.
5443 */
5444 min = mas->min;
5445 if (mas->offset)
5446 min = mte_pivot(mas->node, mas->offset - 1) + 1;
5447
5448 if (mas->index < min)
5449 mas->index = min;
5450
5451 mas_fill_gap(mas, entry, mas->offset, size, index);
5452 return 0;
5453
5454no_gap:
5455 return -EBUSY;
5456}
5457
5458static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
5459 unsigned long max, void *entry,
5460 unsigned long size, unsigned long *index)
5461{
5462 int ret = 0;
5463
5464 ret = mas_empty_area_rev(mas, min, max, size);
5465 if (ret)
5466 return ret;
5467
5468 if (mas_is_err(mas))
5469 return xa_err(mas->node);
5470
5471 if (mas->offset == MAPLE_NODE_SLOTS)
5472 goto no_gap;
5473
5474 mas_fill_gap(mas, entry, mas->offset, size, index);
5475 return 0;
5476
5477no_gap:
5478 return -EBUSY;
5479}
5480
5481/*
790e1fa8 5482 * mte_dead_leaves() - Mark all leaves of a node as dead.
54a611b6
LH
5483 * @mas: The maple state
5484 * @slots: Pointer to the slot array
2e5b4921 5485 * @type: The maple node type
54a611b6
LH
5486 *
5487 * Must hold the write lock.
5488 *
5489 * Return: The number of leaves marked as dead.
5490 */
5491static inline
790e1fa8
LH
5492unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
5493 void __rcu **slots)
54a611b6
LH
5494{
5495 struct maple_node *node;
5496 enum maple_type type;
5497 void *entry;
5498 int offset;
5499
790e1fa8
LH
5500 for (offset = 0; offset < mt_slot_count(enode); offset++) {
5501 entry = mt_slot(mt, slots, offset);
54a611b6
LH
5502 type = mte_node_type(entry);
5503 node = mte_to_node(entry);
5504 /* Use both node and type to catch LE & BE metadata */
5505 if (!node || !type)
5506 break;
5507
5508 mte_set_node_dead(entry);
54a611b6
LH
5509 node->type = type;
5510 rcu_assign_pointer(slots[offset], node);
5511 }
5512
5513 return offset;
5514}
5515
790e1fa8
LH
5516/**
5517 * mte_dead_walk() - Walk down a dead tree to just before the leaves
5518 * @enode: The maple encoded node
5519 * @offset: The starting offset
5520 *
5521 * Note: This can only be used from the RCU callback context.
5522 */
5523static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
54a611b6 5524{
790e1fa8 5525 struct maple_node *node, *next;
54a611b6
LH
5526 void __rcu **slots = NULL;
5527
790e1fa8 5528 next = mte_to_node(*enode);
54a611b6 5529 do {
790e1fa8
LH
5530 *enode = ma_enode_ptr(next);
5531 node = mte_to_node(*enode);
5532 slots = ma_slots(node, node->type);
5533 next = rcu_dereference_protected(slots[offset],
5534 lock_is_held(&rcu_callback_map));
54a611b6
LH
5535 offset = 0;
5536 } while (!ma_is_leaf(next->type));
5537
5538 return slots;
5539}
5540
790e1fa8
LH
5541/**
5542 * mt_free_walk() - Walk & free a tree in the RCU callback context
5543 * @head: The RCU head that's within the node.
5544 *
5545 * Note: This can only be used from the RCU callback context.
5546 */
54a611b6
LH
5547static void mt_free_walk(struct rcu_head *head)
5548{
5549 void __rcu **slots;
5550 struct maple_node *node, *start;
790e1fa8 5551 struct maple_enode *enode;
54a611b6
LH
5552 unsigned char offset;
5553 enum maple_type type;
54a611b6
LH
5554
5555 node = container_of(head, struct maple_node, rcu);
5556
5557 if (ma_is_leaf(node->type))
5558 goto free_leaf;
5559
54a611b6 5560 start = node;
790e1fa8
LH
5561 enode = mt_mk_node(node, node->type);
5562 slots = mte_dead_walk(&enode, 0);
5563 node = mte_to_node(enode);
54a611b6
LH
5564 do {
5565 mt_free_bulk(node->slot_len, slots);
5566 offset = node->parent_slot + 1;
790e1fa8
LH
5567 enode = node->piv_parent;
5568 if (mte_to_node(enode) == node)
5569 goto free_leaf;
5570
5571 type = mte_node_type(enode);
5572 slots = ma_slots(mte_to_node(enode), type);
5573 if ((offset < mt_slots[type]) &&
5574 rcu_dereference_protected(slots[offset],
5575 lock_is_held(&rcu_callback_map)))
5576 slots = mte_dead_walk(&enode, offset);
5577 node = mte_to_node(enode);
54a611b6
LH
5578 } while ((node != start) || (node->slot_len < offset));
5579
5580 slots = ma_slots(node, node->type);
5581 mt_free_bulk(node->slot_len, slots);
5582
54a611b6
LH
5583free_leaf:
5584 mt_free_rcu(&node->rcu);
5585}
5586
790e1fa8
LH
5587static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
5588 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
54a611b6
LH
5589{
5590 struct maple_node *node;
790e1fa8 5591 struct maple_enode *next = *enode;
54a611b6 5592 void __rcu **slots = NULL;
790e1fa8
LH
5593 enum maple_type type;
5594 unsigned char next_offset = 0;
54a611b6
LH
5595
5596 do {
790e1fa8
LH
5597 *enode = next;
5598 node = mte_to_node(*enode);
5599 type = mte_node_type(*enode);
5600 slots = ma_slots(node, type);
5601 next = mt_slot_locked(mt, slots, next_offset);
5602 if ((mte_dead_node(next)))
5603 next = mt_slot_locked(mt, slots, ++next_offset);
54a611b6 5604
790e1fa8
LH
5605 mte_set_node_dead(*enode);
5606 node->type = type;
54a611b6
LH
5607 node->piv_parent = prev;
5608 node->parent_slot = offset;
790e1fa8
LH
5609 offset = next_offset;
5610 next_offset = 0;
5611 prev = *enode;
54a611b6
LH
5612 } while (!mte_is_leaf(next));
5613
5614 return slots;
5615}
5616
790e1fa8 5617static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
54a611b6
LH
5618 bool free)
5619{
5620 void __rcu **slots;
5621 struct maple_node *node = mte_to_node(enode);
5622 struct maple_enode *start;
54a611b6 5623
2e5b4921
LH
5624 if (mte_is_leaf(enode)) {
5625 node->type = mte_node_type(enode);
54a611b6 5626 goto free_leaf;
2e5b4921 5627 }
54a611b6 5628
2e5b4921 5629 start = enode;
790e1fa8
LH
5630 slots = mte_destroy_descend(&enode, mt, start, 0);
5631 node = mte_to_node(enode); // Updated in the above call.
54a611b6
LH
5632 do {
5633 enum maple_type type;
5634 unsigned char offset;
5635 struct maple_enode *parent, *tmp;
5636
790e1fa8 5637 node->slot_len = mte_dead_leaves(enode, mt, slots);
54a611b6
LH
5638 if (free)
5639 mt_free_bulk(node->slot_len, slots);
5640 offset = node->parent_slot + 1;
790e1fa8
LH
5641 enode = node->piv_parent;
5642 if (mte_to_node(enode) == node)
5643 goto free_leaf;
54a611b6 5644
790e1fa8
LH
5645 type = mte_node_type(enode);
5646 slots = ma_slots(mte_to_node(enode), type);
54a611b6
LH
5647 if (offset >= mt_slots[type])
5648 goto next;
5649
790e1fa8 5650 tmp = mt_slot_locked(mt, slots, offset);
54a611b6 5651 if (mte_node_type(tmp) && mte_to_node(tmp)) {
790e1fa8
LH
5652 parent = enode;
5653 enode = tmp;
5654 slots = mte_destroy_descend(&enode, mt, parent, offset);
54a611b6
LH
5655 }
5656next:
790e1fa8
LH
5657 node = mte_to_node(enode);
5658 } while (start != enode);
54a611b6 5659
790e1fa8
LH
5660 node = mte_to_node(enode);
5661 node->slot_len = mte_dead_leaves(enode, mt, slots);
54a611b6
LH
5662 if (free)
5663 mt_free_bulk(node->slot_len, slots);
5664
54a611b6
LH
5665free_leaf:
5666 if (free)
5667 mt_free_rcu(&node->rcu);
2e5b4921 5668 else
790e1fa8 5669 mt_clear_meta(mt, node, node->type);
54a611b6
LH
5670}
5671
5672/*
5673 * mte_destroy_walk() - Free a tree or sub-tree.
f942b0f0
VY
5674 * @enode: the encoded maple node (maple_enode) to start
5675 * @mt: the tree to free - needed for node types.
54a611b6
LH
5676 *
5677 * Must hold the write lock.
5678 */
5679static inline void mte_destroy_walk(struct maple_enode *enode,
5680 struct maple_tree *mt)
5681{
5682 struct maple_node *node = mte_to_node(enode);
5683
5684 if (mt_in_rcu(mt)) {
790e1fa8 5685 mt_destroy_walk(enode, mt, false);
54a611b6
LH
5686 call_rcu(&node->rcu, mt_free_walk);
5687 } else {
790e1fa8 5688 mt_destroy_walk(enode, mt, true);
54a611b6
LH
5689 }
5690}
5691
5692static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
5693{
1202700c
LH
5694 if (unlikely(mas_is_paused(wr_mas->mas)))
5695 mas_reset(wr_mas->mas);
5696
54a611b6
LH
5697 if (!mas_is_start(wr_mas->mas)) {
5698 if (mas_is_none(wr_mas->mas)) {
5699 mas_reset(wr_mas->mas);
5700 } else {
5701 wr_mas->r_max = wr_mas->mas->max;
5702 wr_mas->type = mte_node_type(wr_mas->mas->node);
5703 if (mas_is_span_wr(wr_mas))
5704 mas_reset(wr_mas->mas);
5705 }
5706 }
54a611b6
LH
5707}
5708
5709/* Interface */
5710
5711/**
5712 * mas_store() - Store an @entry.
5713 * @mas: The maple state.
5714 * @entry: The entry to store.
5715 *
5716 * The @mas->index and @mas->last is used to set the range for the @entry.
5717 * Note: The @mas should have pre-allocated entries to ensure there is memory to
5718 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details.
5719 *
5720 * Return: the first entry between mas->index and mas->last or %NULL.
5721 */
5722void *mas_store(struct ma_state *mas, void *entry)
5723{
5724 MA_WR_STATE(wr_mas, mas, entry);
5725
5726 trace_ma_write(__func__, mas, 0, entry);
5727#ifdef CONFIG_DEBUG_MAPLE_TREE
5728 if (mas->index > mas->last)
5729 pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry);
5730 MT_BUG_ON(mas->tree, mas->index > mas->last);
5731 if (mas->index > mas->last) {
5732 mas_set_err(mas, -EINVAL);
5733 return NULL;
5734 }
5735
5736#endif
5737
5738 /*
5739 * Storing is the same operation as insert with the added caveat that it
5740 * can overwrite entries. Although this seems simple enough, one may
5741 * want to examine what happens if a single store operation was to
5742 * overwrite multiple entries within a self-balancing B-Tree.
5743 */
5744 mas_wr_store_setup(&wr_mas);
5745 mas_wr_store_entry(&wr_mas);
5746 return wr_mas.content;
5747}
120b1162 5748EXPORT_SYMBOL_GPL(mas_store);
54a611b6
LH
5749
5750/**
5751 * mas_store_gfp() - Store a value into the tree.
5752 * @mas: The maple state
5753 * @entry: The entry to store
5754 * @gfp: The GFP_FLAGS to use for allocations if necessary.
5755 *
5756 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5757 * be allocated.
5758 */
5759int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
5760{
5761 MA_WR_STATE(wr_mas, mas, entry);
5762
5763 mas_wr_store_setup(&wr_mas);
5764 trace_ma_write(__func__, mas, 0, entry);
5765retry:
5766 mas_wr_store_entry(&wr_mas);
5767 if (unlikely(mas_nomem(mas, gfp)))
5768 goto retry;
5769
5770 if (unlikely(mas_is_err(mas)))
5771 return xa_err(mas->node);
5772
5773 return 0;
5774}
120b1162 5775EXPORT_SYMBOL_GPL(mas_store_gfp);
54a611b6
LH
5776
5777/**
5778 * mas_store_prealloc() - Store a value into the tree using memory
5779 * preallocated in the maple state.
5780 * @mas: The maple state
5781 * @entry: The entry to store.
5782 */
5783void mas_store_prealloc(struct ma_state *mas, void *entry)
5784{
5785 MA_WR_STATE(wr_mas, mas, entry);
5786
5787 mas_wr_store_setup(&wr_mas);
5788 trace_ma_write(__func__, mas, 0, entry);
5789 mas_wr_store_entry(&wr_mas);
5790 BUG_ON(mas_is_err(mas));
5791 mas_destroy(mas);
5792}
120b1162 5793EXPORT_SYMBOL_GPL(mas_store_prealloc);
54a611b6
LH
5794
5795/**
5796 * mas_preallocate() - Preallocate enough nodes for a store operation
5797 * @mas: The maple state
54a611b6
LH
5798 * @gfp: The GFP_FLAGS to use for allocations.
5799 *
5800 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5801 */
c5d5546e 5802int mas_preallocate(struct ma_state *mas, gfp_t gfp)
54a611b6
LH
5803{
5804 int ret;
5805
5806 mas_node_count_gfp(mas, 1 + mas_mt_height(mas) * 3, gfp);
5807 mas->mas_flags |= MA_STATE_PREALLOC;
5808 if (likely(!mas_is_err(mas)))
5809 return 0;
5810
5811 mas_set_alloc_req(mas, 0);
5812 ret = xa_err(mas->node);
5813 mas_reset(mas);
5814 mas_destroy(mas);
5815 mas_reset(mas);
5816 return ret;
5817}
5818
5819/*
5820 * mas_destroy() - destroy a maple state.
5821 * @mas: The maple state
5822 *
5823 * Upon completion, check the left-most node and rebalance against the node to
5824 * the right if necessary. Frees any allocated nodes associated with this maple
5825 * state.
5826 */
5827void mas_destroy(struct ma_state *mas)
5828{
5829 struct maple_alloc *node;
541e06b7 5830 unsigned long total;
54a611b6
LH
5831
5832 /*
5833 * When using mas_for_each() to insert an expected number of elements,
5834 * it is possible that the number inserted is less than the expected
5835 * number. To fix an invalid final node, a check is performed here to
5836 * rebalance the previous node with the final node.
5837 */
5838 if (mas->mas_flags & MA_STATE_REBALANCE) {
5839 unsigned char end;
5840
5841 if (mas_is_start(mas))
5842 mas_start(mas);
5843
5844 mtree_range_walk(mas);
5845 end = mas_data_end(mas) + 1;
5846 if (end < mt_min_slot_count(mas->node) - 1)
5847 mas_destroy_rebalance(mas, end);
5848
5849 mas->mas_flags &= ~MA_STATE_REBALANCE;
5850 }
5851 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
5852
541e06b7
LH
5853 total = mas_allocated(mas);
5854 while (total) {
54a611b6
LH
5855 node = mas->alloc;
5856 mas->alloc = node->slot[0];
541e06b7
LH
5857 if (node->node_count > 1) {
5858 size_t count = node->node_count - 1;
5859
5860 mt_free_bulk(count, (void __rcu **)&node->slot[1]);
5861 total -= count;
5862 }
54a611b6 5863 kmem_cache_free(maple_node_cache, node);
541e06b7 5864 total--;
54a611b6 5865 }
541e06b7 5866
54a611b6
LH
5867 mas->alloc = NULL;
5868}
120b1162 5869EXPORT_SYMBOL_GPL(mas_destroy);
54a611b6
LH
5870
5871/*
5872 * mas_expected_entries() - Set the expected number of entries that will be inserted.
5873 * @mas: The maple state
5874 * @nr_entries: The number of expected entries.
5875 *
5876 * This will attempt to pre-allocate enough nodes to store the expected number
5877 * of entries. The allocations will occur using the bulk allocator interface
5878 * for speed. Please call mas_destroy() on the @mas after inserting the entries
5879 * to ensure any unused nodes are freed.
5880 *
5881 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5882 */
5883int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
5884{
5885 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
5886 struct maple_enode *enode = mas->node;
5887 int nr_nodes;
5888 int ret;
5889
5890 /*
5891 * Sometimes it is necessary to duplicate a tree to a new tree, such as
5892 * forking a process and duplicating the VMAs from one tree to a new
5893 * tree. When such a situation arises, it is known that the new tree is
5894 * not going to be used until the entire tree is populated. For
5895 * performance reasons, it is best to use a bulk load with RCU disabled.
5896 * This allows for optimistic splitting that favours the left and reuse
5897 * of nodes during the operation.
5898 */
5899
5900 /* Optimize splitting for bulk insert in-order */
5901 mas->mas_flags |= MA_STATE_BULK;
5902
5903 /*
5904 * Avoid overflow, assume a gap between each entry and a trailing null.
5905 * If this is wrong, it just means allocation can happen during
5906 * insertion of entries.
5907 */
5908 nr_nodes = max(nr_entries, nr_entries * 2 + 1);
5909 if (!mt_is_alloc(mas->tree))
5910 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
5911
5912 /* Leaves; reduce slots to keep space for expansion */
5913 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
5914 /* Internal nodes */
5915 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
5916 /* Add working room for split (2 nodes) + new parents */
5917 mas_node_count(mas, nr_nodes + 3);
5918
5919 /* Detect if allocations run out */
5920 mas->mas_flags |= MA_STATE_PREALLOC;
5921
5922 if (!mas_is_err(mas))
5923 return 0;
5924
5925 ret = xa_err(mas->node);
5926 mas->node = enode;
5927 mas_destroy(mas);
5928 return ret;
5929
5930}
120b1162 5931EXPORT_SYMBOL_GPL(mas_expected_entries);
54a611b6
LH
5932
5933/**
5934 * mas_next() - Get the next entry.
5935 * @mas: The maple state
5936 * @max: The maximum index to check.
5937 *
5938 * Returns the next entry after @mas->index.
5939 * Must hold rcu_read_lock or the write lock.
5940 * Can return the zero entry.
5941 *
5942 * Return: The next entry or %NULL
5943 */
5944void *mas_next(struct ma_state *mas, unsigned long max)
5945{
5946 if (mas_is_none(mas) || mas_is_paused(mas))
5947 mas->node = MAS_START;
5948
5949 if (mas_is_start(mas))
5950 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5951
5952 if (mas_is_ptr(mas)) {
5953 if (!mas->index) {
5954 mas->index = 1;
5955 mas->last = ULONG_MAX;
5956 }
5957 return NULL;
5958 }
5959
5960 if (mas->last == ULONG_MAX)
5961 return NULL;
5962
5963 /* Retries on dead nodes handled by mas_next_entry */
5964 return mas_next_entry(mas, max);
5965}
5966EXPORT_SYMBOL_GPL(mas_next);
5967
5968/**
5969 * mt_next() - get the next value in the maple tree
5970 * @mt: The maple tree
5971 * @index: The start index
5972 * @max: The maximum index to check
5973 *
5974 * Return: The entry at @index or higher, or %NULL if nothing is found.
5975 */
5976void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5977{
5978 void *entry = NULL;
5979 MA_STATE(mas, mt, index, index);
5980
5981 rcu_read_lock();
5982 entry = mas_next(&mas, max);
5983 rcu_read_unlock();
5984 return entry;
5985}
5986EXPORT_SYMBOL_GPL(mt_next);
5987
5988/**
5989 * mas_prev() - Get the previous entry
5990 * @mas: The maple state
5991 * @min: The minimum value to check.
5992 *
5993 * Must hold rcu_read_lock or the write lock.
5994 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not
5995 * searchable nodes.
5996 *
5997 * Return: the previous value or %NULL.
5998 */
5999void *mas_prev(struct ma_state *mas, unsigned long min)
6000{
6001 if (!mas->index) {
6002 /* Nothing comes before 0 */
6003 mas->last = 0;
17dc622c 6004 mas->node = MAS_NONE;
54a611b6
LH
6005 return NULL;
6006 }
6007
6008 if (unlikely(mas_is_ptr(mas)))
6009 return NULL;
6010
6011 if (mas_is_none(mas) || mas_is_paused(mas))
6012 mas->node = MAS_START;
6013
6014 if (mas_is_start(mas)) {
6015 mas_walk(mas);
6016 if (!mas->index)
6017 return NULL;
6018 }
6019
6020 if (mas_is_ptr(mas)) {
6021 if (!mas->index) {
6022 mas->last = 0;
6023 return NULL;
6024 }
6025
6026 mas->index = mas->last = 0;
6027 return mas_root_locked(mas);
6028 }
6029 return mas_prev_entry(mas, min);
6030}
6031EXPORT_SYMBOL_GPL(mas_prev);
6032
6033/**
6034 * mt_prev() - get the previous value in the maple tree
6035 * @mt: The maple tree
6036 * @index: The start index
6037 * @min: The minimum index to check
6038 *
6039 * Return: The entry at @index or lower, or %NULL if nothing is found.
6040 */
6041void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
6042{
6043 void *entry = NULL;
6044 MA_STATE(mas, mt, index, index);
6045
6046 rcu_read_lock();
6047 entry = mas_prev(&mas, min);
6048 rcu_read_unlock();
6049 return entry;
6050}
6051EXPORT_SYMBOL_GPL(mt_prev);
6052
6053/**
6054 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
6055 * @mas: The maple state to pause
6056 *
6057 * Some users need to pause a walk and drop the lock they're holding in
6058 * order to yield to a higher priority thread or carry out an operation
6059 * on an entry. Those users should call this function before they drop
6060 * the lock. It resets the @mas to be suitable for the next iteration
6061 * of the loop after the user has reacquired the lock. If most entries
6062 * found during a walk require you to call mas_pause(), the mt_for_each()
6063 * iterator may be more appropriate.
6064 *
6065 */
6066void mas_pause(struct ma_state *mas)
6067{
6068 mas->node = MAS_PAUSE;
6069}
6070EXPORT_SYMBOL_GPL(mas_pause);
6071
6072/**
6073 * mas_find() - On the first call, find the entry at or after mas->index up to
6074 * %max. Otherwise, find the entry after mas->index.
6075 * @mas: The maple state
6076 * @max: The maximum value to check.
6077 *
6078 * Must hold rcu_read_lock or the write lock.
6079 * If an entry exists, last and index are updated accordingly.
6080 * May set @mas->node to MAS_NONE.
6081 *
6082 * Return: The entry or %NULL.
6083 */
6084void *mas_find(struct ma_state *mas, unsigned long max)
6085{
6086 if (unlikely(mas_is_paused(mas))) {
6087 if (unlikely(mas->last == ULONG_MAX)) {
6088 mas->node = MAS_NONE;
6089 return NULL;
6090 }
6091 mas->node = MAS_START;
6092 mas->index = ++mas->last;
6093 }
6094
17dc622c
LH
6095 if (unlikely(mas_is_none(mas)))
6096 mas->node = MAS_START;
6097
54a611b6
LH
6098 if (unlikely(mas_is_start(mas))) {
6099 /* First run or continue */
6100 void *entry;
6101
6102 if (mas->index > max)
6103 return NULL;
6104
6105 entry = mas_walk(mas);
6106 if (entry)
6107 return entry;
6108 }
6109
6110 if (unlikely(!mas_searchable(mas)))
6111 return NULL;
6112
6113 /* Retries on dead nodes handled by mas_next_entry */
6114 return mas_next_entry(mas, max);
6115}
120b1162 6116EXPORT_SYMBOL_GPL(mas_find);
54a611b6
LH
6117
6118/**
6119 * mas_find_rev: On the first call, find the first non-null entry at or below
6120 * mas->index down to %min. Otherwise find the first non-null entry below
6121 * mas->index down to %min.
6122 * @mas: The maple state
6123 * @min: The minimum value to check.
6124 *
6125 * Must hold rcu_read_lock or the write lock.
6126 * If an entry exists, last and index are updated accordingly.
6127 * May set @mas->node to MAS_NONE.
6128 *
6129 * Return: The entry or %NULL.
6130 */
6131void *mas_find_rev(struct ma_state *mas, unsigned long min)
6132{
6133 if (unlikely(mas_is_paused(mas))) {
6134 if (unlikely(mas->last == ULONG_MAX)) {
6135 mas->node = MAS_NONE;
6136 return NULL;
6137 }
6138 mas->node = MAS_START;
6139 mas->last = --mas->index;
6140 }
6141
6142 if (unlikely(mas_is_start(mas))) {
6143 /* First run or continue */
6144 void *entry;
6145
6146 if (mas->index < min)
6147 return NULL;
6148
6149 entry = mas_walk(mas);
6150 if (entry)
6151 return entry;
6152 }
6153
6154 if (unlikely(!mas_searchable(mas)))
6155 return NULL;
6156
6157 if (mas->index < min)
6158 return NULL;
6159
d98c86b9 6160 /* Retries on dead nodes handled by mas_prev_entry */
54a611b6
LH
6161 return mas_prev_entry(mas, min);
6162}
120b1162 6163EXPORT_SYMBOL_GPL(mas_find_rev);
54a611b6
LH
6164
6165/**
6166 * mas_erase() - Find the range in which index resides and erase the entire
6167 * range.
6168 * @mas: The maple state
6169 *
6170 * Must hold the write lock.
6171 * Searches for @mas->index, sets @mas->index and @mas->last to the range and
6172 * erases that range.
6173 *
6174 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
6175 */
6176void *mas_erase(struct ma_state *mas)
6177{
6178 void *entry;
6179 MA_WR_STATE(wr_mas, mas, NULL);
6180
6181 if (mas_is_none(mas) || mas_is_paused(mas))
6182 mas->node = MAS_START;
6183
6184 /* Retry unnecessary when holding the write lock. */
6185 entry = mas_state_walk(mas);
6186 if (!entry)
6187 return NULL;
6188
6189write_retry:
6190 /* Must reset to ensure spanning writes of last slot are detected */
6191 mas_reset(mas);
6192 mas_wr_store_setup(&wr_mas);
6193 mas_wr_store_entry(&wr_mas);
6194 if (mas_nomem(mas, GFP_KERNEL))
6195 goto write_retry;
6196
6197 return entry;
6198}
6199EXPORT_SYMBOL_GPL(mas_erase);
6200
6201/**
6202 * mas_nomem() - Check if there was an error allocating and do the allocation
6203 * if necessary If there are allocations, then free them.
6204 * @mas: The maple state
6205 * @gfp: The GFP_FLAGS to use for allocations
6206 * Return: true on allocation, false otherwise.
6207 */
6208bool mas_nomem(struct ma_state *mas, gfp_t gfp)
6209 __must_hold(mas->tree->lock)
6210{
6211 if (likely(mas->node != MA_ERROR(-ENOMEM))) {
6212 mas_destroy(mas);
6213 return false;
6214 }
6215
6216 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
6217 mtree_unlock(mas->tree);
6218 mas_alloc_nodes(mas, gfp);
6219 mtree_lock(mas->tree);
6220 } else {
6221 mas_alloc_nodes(mas, gfp);
6222 }
6223
6224 if (!mas_allocated(mas))
6225 return false;
6226
6227 mas->node = MAS_START;
6228 return true;
6229}
6230
6231void __init maple_tree_init(void)
6232{
6233 maple_node_cache = kmem_cache_create("maple_node",
6234 sizeof(struct maple_node), sizeof(struct maple_node),
6235 SLAB_PANIC, NULL);
6236}
6237
6238/**
6239 * mtree_load() - Load a value stored in a maple tree
6240 * @mt: The maple tree
6241 * @index: The index to load
6242 *
6243 * Return: the entry or %NULL
6244 */
6245void *mtree_load(struct maple_tree *mt, unsigned long index)
6246{
6247 MA_STATE(mas, mt, index, index);
6248 void *entry;
6249
6250 trace_ma_read(__func__, &mas);
6251 rcu_read_lock();
6252retry:
6253 entry = mas_start(&mas);
6254 if (unlikely(mas_is_none(&mas)))
6255 goto unlock;
6256
6257 if (unlikely(mas_is_ptr(&mas))) {
6258 if (index)
6259 entry = NULL;
6260
6261 goto unlock;
6262 }
6263
6264 entry = mtree_lookup_walk(&mas);
6265 if (!entry && unlikely(mas_is_start(&mas)))
6266 goto retry;
6267unlock:
6268 rcu_read_unlock();
6269 if (xa_is_zero(entry))
6270 return NULL;
6271
6272 return entry;
6273}
6274EXPORT_SYMBOL(mtree_load);
6275
6276/**
6277 * mtree_store_range() - Store an entry at a given range.
6278 * @mt: The maple tree
6279 * @index: The start of the range
6280 * @last: The end of the range
6281 * @entry: The entry to store
6282 * @gfp: The GFP_FLAGS to use for allocations
6283 *
6284 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6285 * be allocated.
6286 */
6287int mtree_store_range(struct maple_tree *mt, unsigned long index,
6288 unsigned long last, void *entry, gfp_t gfp)
6289{
6290 MA_STATE(mas, mt, index, last);
6291 MA_WR_STATE(wr_mas, &mas, entry);
6292
6293 trace_ma_write(__func__, &mas, 0, entry);
6294 if (WARN_ON_ONCE(xa_is_advanced(entry)))
6295 return -EINVAL;
6296
6297 if (index > last)
6298 return -EINVAL;
6299
6300 mtree_lock(mt);
6301retry:
6302 mas_wr_store_entry(&wr_mas);
6303 if (mas_nomem(&mas, gfp))
6304 goto retry;
6305
6306 mtree_unlock(mt);
6307 if (mas_is_err(&mas))
6308 return xa_err(mas.node);
6309
6310 return 0;
6311}
6312EXPORT_SYMBOL(mtree_store_range);
6313
6314/**
6315 * mtree_store() - Store an entry at a given index.
6316 * @mt: The maple tree
6317 * @index: The index to store the value
6318 * @entry: The entry to store
6319 * @gfp: The GFP_FLAGS to use for allocations
6320 *
6321 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6322 * be allocated.
6323 */
6324int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
6325 gfp_t gfp)
6326{
6327 return mtree_store_range(mt, index, index, entry, gfp);
6328}
6329EXPORT_SYMBOL(mtree_store);
6330
6331/**
6332 * mtree_insert_range() - Insert an entry at a give range if there is no value.
6333 * @mt: The maple tree
6334 * @first: The start of the range
6335 * @last: The end of the range
6336 * @entry: The entry to store
6337 * @gfp: The GFP_FLAGS to use for allocations.
6338 *
6339 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6340 * request, -ENOMEM if memory could not be allocated.
6341 */
6342int mtree_insert_range(struct maple_tree *mt, unsigned long first,
6343 unsigned long last, void *entry, gfp_t gfp)
6344{
6345 MA_STATE(ms, mt, first, last);
6346
6347 if (WARN_ON_ONCE(xa_is_advanced(entry)))
6348 return -EINVAL;
6349
6350 if (first > last)
6351 return -EINVAL;
6352
6353 mtree_lock(mt);
6354retry:
6355 mas_insert(&ms, entry);
6356 if (mas_nomem(&ms, gfp))
6357 goto retry;
6358
6359 mtree_unlock(mt);
6360 if (mas_is_err(&ms))
6361 return xa_err(ms.node);
6362
6363 return 0;
6364}
6365EXPORT_SYMBOL(mtree_insert_range);
6366
6367/**
6368 * mtree_insert() - Insert an entry at a give index if there is no value.
6369 * @mt: The maple tree
6370 * @index : The index to store the value
6371 * @entry: The entry to store
6372 * @gfp: The FGP_FLAGS to use for allocations.
6373 *
6374 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6375 * request, -ENOMEM if memory could not be allocated.
6376 */
6377int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
6378 gfp_t gfp)
6379{
6380 return mtree_insert_range(mt, index, index, entry, gfp);
6381}
6382EXPORT_SYMBOL(mtree_insert);
6383
6384int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
6385 void *entry, unsigned long size, unsigned long min,
6386 unsigned long max, gfp_t gfp)
6387{
6388 int ret = 0;
6389
6390 MA_STATE(mas, mt, min, max - size);
6391 if (!mt_is_alloc(mt))
6392 return -EINVAL;
6393
6394 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6395 return -EINVAL;
6396
6397 if (min > max)
6398 return -EINVAL;
6399
6400 if (max < size)
6401 return -EINVAL;
6402
6403 if (!size)
6404 return -EINVAL;
6405
6406 mtree_lock(mt);
6407retry:
6408 mas.offset = 0;
6409 mas.index = min;
6410 mas.last = max - size;
6411 ret = mas_alloc(&mas, entry, size, startp);
6412 if (mas_nomem(&mas, gfp))
6413 goto retry;
6414
6415 mtree_unlock(mt);
6416 return ret;
6417}
6418EXPORT_SYMBOL(mtree_alloc_range);
6419
6420int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
6421 void *entry, unsigned long size, unsigned long min,
6422 unsigned long max, gfp_t gfp)
6423{
6424 int ret = 0;
6425
6426 MA_STATE(mas, mt, min, max - size);
6427 if (!mt_is_alloc(mt))
6428 return -EINVAL;
6429
6430 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6431 return -EINVAL;
6432
6433 if (min >= max)
6434 return -EINVAL;
6435
6436 if (max < size - 1)
6437 return -EINVAL;
6438
6439 if (!size)
6440 return -EINVAL;
6441
6442 mtree_lock(mt);
6443retry:
6444 ret = mas_rev_alloc(&mas, min, max, entry, size, startp);
6445 if (mas_nomem(&mas, gfp))
6446 goto retry;
6447
6448 mtree_unlock(mt);
6449 return ret;
6450}
6451EXPORT_SYMBOL(mtree_alloc_rrange);
6452
6453/**
6454 * mtree_erase() - Find an index and erase the entire range.
6455 * @mt: The maple tree
6456 * @index: The index to erase
6457 *
6458 * Erasing is the same as a walk to an entry then a store of a NULL to that
6459 * ENTIRE range. In fact, it is implemented as such using the advanced API.
6460 *
6461 * Return: The entry stored at the @index or %NULL
6462 */
6463void *mtree_erase(struct maple_tree *mt, unsigned long index)
6464{
6465 void *entry = NULL;
6466
6467 MA_STATE(mas, mt, index, index);
6468 trace_ma_op(__func__, &mas);
6469
6470 mtree_lock(mt);
6471 entry = mas_erase(&mas);
6472 mtree_unlock(mt);
6473
6474 return entry;
6475}
6476EXPORT_SYMBOL(mtree_erase);
6477
6478/**
6479 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6480 * @mt: The maple tree
6481 *
6482 * Note: Does not handle locking.
6483 */
6484void __mt_destroy(struct maple_tree *mt)
6485{
6486 void *root = mt_root_locked(mt);
6487
6488 rcu_assign_pointer(mt->ma_root, NULL);
6489 if (xa_is_node(root))
6490 mte_destroy_walk(root, mt);
6491
6492 mt->ma_flags = 0;
6493}
6494EXPORT_SYMBOL_GPL(__mt_destroy);
6495
6496/**
6497 * mtree_destroy() - Destroy a maple tree
6498 * @mt: The maple tree
6499 *
6500 * Frees all resources used by the tree. Handles locking.
6501 */
6502void mtree_destroy(struct maple_tree *mt)
6503{
6504 mtree_lock(mt);
6505 __mt_destroy(mt);
6506 mtree_unlock(mt);
6507}
6508EXPORT_SYMBOL(mtree_destroy);
6509
6510/**
6511 * mt_find() - Search from the start up until an entry is found.
6512 * @mt: The maple tree
6513 * @index: Pointer which contains the start location of the search
6514 * @max: The maximum value to check
6515 *
6516 * Handles locking. @index will be incremented to one beyond the range.
6517 *
6518 * Return: The entry at or after the @index or %NULL
6519 */
6520void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6521{
6522 MA_STATE(mas, mt, *index, *index);
6523 void *entry;
6524#ifdef CONFIG_DEBUG_MAPLE_TREE
6525 unsigned long copy = *index;
6526#endif
6527
6528 trace_ma_read(__func__, &mas);
6529
6530 if ((*index) > max)
6531 return NULL;
6532
6533 rcu_read_lock();
6534retry:
6535 entry = mas_state_walk(&mas);
6536 if (mas_is_start(&mas))
6537 goto retry;
6538
6539 if (unlikely(xa_is_zero(entry)))
6540 entry = NULL;
6541
6542 if (entry)
6543 goto unlock;
6544
6545 while (mas_searchable(&mas) && (mas.index < max)) {
6546 entry = mas_next_entry(&mas, max);
6547 if (likely(entry && !xa_is_zero(entry)))
6548 break;
6549 }
6550
6551 if (unlikely(xa_is_zero(entry)))
6552 entry = NULL;
6553unlock:
6554 rcu_read_unlock();
6555 if (likely(entry)) {
6556 *index = mas.last + 1;
6557#ifdef CONFIG_DEBUG_MAPLE_TREE
6558 if ((*index) && (*index) <= copy)
6559 pr_err("index not increased! %lx <= %lx\n",
6560 *index, copy);
6561 MT_BUG_ON(mt, (*index) && ((*index) <= copy));
6562#endif
6563 }
6564
6565 return entry;
6566}
6567EXPORT_SYMBOL(mt_find);
6568
6569/**
6570 * mt_find_after() - Search from the start up until an entry is found.
6571 * @mt: The maple tree
6572 * @index: Pointer which contains the start location of the search
6573 * @max: The maximum value to check
6574 *
6575 * Handles locking, detects wrapping on index == 0
6576 *
6577 * Return: The entry at or after the @index or %NULL
6578 */
6579void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6580 unsigned long max)
6581{
6582 if (!(*index))
6583 return NULL;
6584
6585 return mt_find(mt, index, max);
6586}
6587EXPORT_SYMBOL(mt_find_after);
6588
6589#ifdef CONFIG_DEBUG_MAPLE_TREE
6590atomic_t maple_tree_tests_run;
6591EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6592atomic_t maple_tree_tests_passed;
6593EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6594
6595#ifndef __KERNEL__
6596extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
6597void mt_set_non_kernel(unsigned int val)
6598{
6599 kmem_cache_set_non_kernel(maple_node_cache, val);
6600}
6601
6602extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
6603unsigned long mt_get_alloc_size(void)
6604{
6605 return kmem_cache_get_alloc(maple_node_cache);
6606}
6607
6608extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
6609void mt_zero_nr_tallocated(void)
6610{
6611 kmem_cache_zero_nr_tallocated(maple_node_cache);
6612}
6613
6614extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
6615unsigned int mt_nr_tallocated(void)
6616{
6617 return kmem_cache_nr_tallocated(maple_node_cache);
6618}
6619
6620extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
6621unsigned int mt_nr_allocated(void)
6622{
6623 return kmem_cache_nr_allocated(maple_node_cache);
6624}
6625
6626/*
6627 * mas_dead_node() - Check if the maple state is pointing to a dead node.
6628 * @mas: The maple state
6629 * @index: The index to restore in @mas.
6630 *
6631 * Used in test code.
6632 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise.
6633 */
6634static inline int mas_dead_node(struct ma_state *mas, unsigned long index)
6635{
6636 if (unlikely(!mas_searchable(mas) || mas_is_start(mas)))
6637 return 0;
6638
6639 if (likely(!mte_dead_node(mas->node)))
6640 return 0;
6641
6642 mas_rewalk(mas, index);
6643 return 1;
6644}
54a611b6 6645
120b1162
LH
6646void mt_cache_shrink(void)
6647{
6648}
6649#else
6650/*
6651 * mt_cache_shrink() - For testing, don't use this.
6652 *
6653 * Certain testcases can trigger an OOM when combined with other memory
6654 * debugging configuration options. This function is used to reduce the
6655 * possibility of an out of memory even due to kmem_cache objects remaining
6656 * around for longer than usual.
6657 */
6658void mt_cache_shrink(void)
6659{
6660 kmem_cache_shrink(maple_node_cache);
6661
6662}
6663EXPORT_SYMBOL_GPL(mt_cache_shrink);
6664
6665#endif /* not defined __KERNEL__ */
54a611b6
LH
6666/*
6667 * mas_get_slot() - Get the entry in the maple state node stored at @offset.
6668 * @mas: The maple state
6669 * @offset: The offset into the slot array to fetch.
6670 *
6671 * Return: The entry stored at @offset.
6672 */
6673static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
6674 unsigned char offset)
6675{
6676 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
6677 offset);
6678}
6679
6680
6681/*
6682 * mas_first_entry() - Go the first leaf and find the first entry.
6683 * @mas: the maple state.
6684 * @limit: the maximum index to check.
6685 * @*r_start: Pointer to set to the range start.
6686 *
6687 * Sets mas->offset to the offset of the entry, r_start to the range minimum.
6688 *
6689 * Return: The first entry or MAS_NONE.
6690 */
6691static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
6692 unsigned long limit, enum maple_type mt)
6693
6694{
6695 unsigned long max;
6696 unsigned long *pivots;
6697 void __rcu **slots;
6698 void *entry = NULL;
6699
6700 mas->index = mas->min;
6701 if (mas->index > limit)
6702 goto none;
6703
6704 max = mas->max;
6705 mas->offset = 0;
6706 while (likely(!ma_is_leaf(mt))) {
6707 MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
6708 slots = ma_slots(mn, mt);
54a611b6 6709 entry = mas_slot(mas, slots, 0);
39d0bd86 6710 pivots = ma_pivots(mn, mt);
54a611b6
LH
6711 if (unlikely(ma_dead_node(mn)))
6712 return NULL;
39d0bd86 6713 max = pivots[0];
54a611b6
LH
6714 mas->node = entry;
6715 mn = mas_mn(mas);
6716 mt = mte_node_type(mas->node);
6717 }
6718 MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
6719
6720 mas->max = max;
6721 slots = ma_slots(mn, mt);
6722 entry = mas_slot(mas, slots, 0);
6723 if (unlikely(ma_dead_node(mn)))
6724 return NULL;
6725
6726 /* Slot 0 or 1 must be set */
6727 if (mas->index > limit)
6728 goto none;
6729
6730 if (likely(entry))
6731 return entry;
6732
54a611b6
LH
6733 mas->offset = 1;
6734 entry = mas_slot(mas, slots, 1);
39d0bd86 6735 pivots = ma_pivots(mn, mt);
54a611b6
LH
6736 if (unlikely(ma_dead_node(mn)))
6737 return NULL;
6738
39d0bd86 6739 mas->index = pivots[0] + 1;
54a611b6
LH
6740 if (mas->index > limit)
6741 goto none;
6742
6743 if (likely(entry))
6744 return entry;
6745
6746none:
6747 if (likely(!ma_dead_node(mn)))
6748 mas->node = MAS_NONE;
6749 return NULL;
6750}
6751
6752/* Depth first search, post-order */
6753static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
6754{
6755
6756 struct maple_enode *p = MAS_NONE, *mn = mas->node;
6757 unsigned long p_min, p_max;
6758
6759 mas_next_node(mas, mas_mn(mas), max);
6760 if (!mas_is_none(mas))
6761 return;
6762
6763 if (mte_is_root(mn))
6764 return;
6765
6766 mas->node = mn;
6767 mas_ascend(mas);
6768 while (mas->node != MAS_NONE) {
6769 p = mas->node;
6770 p_min = mas->min;
6771 p_max = mas->max;
6772 mas_prev_node(mas, 0);
6773 }
6774
6775 if (p == MAS_NONE)
6776 return;
6777
6778 mas->node = p;
6779 mas->max = p_max;
6780 mas->min = p_min;
6781}
6782
6783/* Tree validations */
6784static void mt_dump_node(const struct maple_tree *mt, void *entry,
6785 unsigned long min, unsigned long max, unsigned int depth);
6786static void mt_dump_range(unsigned long min, unsigned long max,
6787 unsigned int depth)
6788{
6789 static const char spaces[] = " ";
6790
6791 if (min == max)
6792 pr_info("%.*s%lu: ", depth * 2, spaces, min);
6793 else
6794 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
6795}
6796
6797static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
6798 unsigned int depth)
6799{
6800 mt_dump_range(min, max, depth);
6801
6802 if (xa_is_value(entry))
6803 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
6804 xa_to_value(entry), entry);
6805 else if (xa_is_zero(entry))
6806 pr_cont("zero (%ld)\n", xa_to_internal(entry));
6807 else if (mt_is_reserved(entry))
6808 pr_cont("UNKNOWN ENTRY (%p)\n", entry);
6809 else
6810 pr_cont("%p\n", entry);
6811}
6812
6813static void mt_dump_range64(const struct maple_tree *mt, void *entry,
6814 unsigned long min, unsigned long max, unsigned int depth)
6815{
6816 struct maple_range_64 *node = &mte_to_node(entry)->mr64;
6817 bool leaf = mte_is_leaf(entry);
6818 unsigned long first = min;
6819 int i;
6820
6821 pr_cont(" contents: ");
6822 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++)
6823 pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
6824 pr_cont("%p\n", node->slot[i]);
6825 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
6826 unsigned long last = max;
6827
6828 if (i < (MAPLE_RANGE64_SLOTS - 1))
6829 last = node->pivot[i];
bd592703 6830 else if (!node->slot[i] && max != mt_node_max(entry))
54a611b6
LH
6831 break;
6832 if (last == 0 && i > 0)
6833 break;
6834 if (leaf)
6835 mt_dump_entry(mt_slot(mt, node->slot, i),
6836 first, last, depth + 1);
6837 else if (node->slot[i])
6838 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6839 first, last, depth + 1);
6840
6841 if (last == max)
6842 break;
6843 if (last > max) {
6844 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
6845 node, last, max, i);
6846 break;
6847 }
6848 first = last + 1;
6849 }
6850}
6851
6852static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
6853 unsigned long min, unsigned long max, unsigned int depth)
6854{
6855 struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
6856 bool leaf = mte_is_leaf(entry);
6857 unsigned long first = min;
6858 int i;
6859
6860 pr_cont(" contents: ");
6861 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++)
6862 pr_cont("%lu ", node->gap[i]);
6863 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
6864 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++)
6865 pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
6866 pr_cont("%p\n", node->slot[i]);
6867 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6868 unsigned long last = max;
6869
6870 if (i < (MAPLE_ARANGE64_SLOTS - 1))
6871 last = node->pivot[i];
6872 else if (!node->slot[i])
6873 break;
6874 if (last == 0 && i > 0)
6875 break;
6876 if (leaf)
6877 mt_dump_entry(mt_slot(mt, node->slot, i),
6878 first, last, depth + 1);
6879 else if (node->slot[i])
6880 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6881 first, last, depth + 1);
6882
6883 if (last == max)
6884 break;
6885 if (last > max) {
6886 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
6887 node, last, max, i);
6888 break;
6889 }
6890 first = last + 1;
6891 }
6892}
6893
6894static void mt_dump_node(const struct maple_tree *mt, void *entry,
6895 unsigned long min, unsigned long max, unsigned int depth)
6896{
6897 struct maple_node *node = mte_to_node(entry);
6898 unsigned int type = mte_node_type(entry);
6899 unsigned int i;
6900
6901 mt_dump_range(min, max, depth);
6902
6903 pr_cont("node %p depth %d type %d parent %p", node, depth, type,
6904 node ? node->parent : NULL);
6905 switch (type) {
6906 case maple_dense:
6907 pr_cont("\n");
6908 for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
6909 if (min + i > max)
6910 pr_cont("OUT OF RANGE: ");
6911 mt_dump_entry(mt_slot(mt, node->slot, i),
6912 min + i, min + i, depth);
6913 }
6914 break;
6915 case maple_leaf_64:
6916 case maple_range_64:
6917 mt_dump_range64(mt, entry, min, max, depth);
6918 break;
6919 case maple_arange_64:
6920 mt_dump_arange64(mt, entry, min, max, depth);
6921 break;
6922
6923 default:
6924 pr_cont(" UNKNOWN TYPE\n");
6925 }
6926}
6927
6928void mt_dump(const struct maple_tree *mt)
6929{
6930 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
6931
6932 pr_info("maple_tree(%p) flags %X, height %u root %p\n",
6933 mt, mt->ma_flags, mt_height(mt), entry);
6934 if (!xa_is_node(entry))
6935 mt_dump_entry(entry, 0, 0, 0);
6936 else if (entry)
bd592703 6937 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0);
54a611b6 6938}
120b1162 6939EXPORT_SYMBOL_GPL(mt_dump);
54a611b6
LH
6940
6941/*
6942 * Calculate the maximum gap in a node and check if that's what is reported in
6943 * the parent (unless root).
6944 */
6945static void mas_validate_gaps(struct ma_state *mas)
6946{
6947 struct maple_enode *mte = mas->node;
6948 struct maple_node *p_mn;
6949 unsigned long gap = 0, max_gap = 0;
6950 unsigned long p_end, p_start = mas->min;
6951 unsigned char p_slot;
6952 unsigned long *gaps = NULL;
6953 unsigned long *pivots = ma_pivots(mte_to_node(mte), mte_node_type(mte));
6954 int i;
6955
6956 if (ma_is_dense(mte_node_type(mte))) {
6957 for (i = 0; i < mt_slot_count(mte); i++) {
6958 if (mas_get_slot(mas, i)) {
6959 if (gap > max_gap)
6960 max_gap = gap;
6961 gap = 0;
6962 continue;
6963 }
6964 gap++;
6965 }
6966 goto counted;
6967 }
6968
6969 gaps = ma_gaps(mte_to_node(mte), mte_node_type(mte));
6970 for (i = 0; i < mt_slot_count(mte); i++) {
6971 p_end = mas_logical_pivot(mas, pivots, i, mte_node_type(mte));
6972
6973 if (!gaps) {
6974 if (mas_get_slot(mas, i)) {
6975 gap = 0;
6976 goto not_empty;
6977 }
6978
6979 gap += p_end - p_start + 1;
6980 } else {
6981 void *entry = mas_get_slot(mas, i);
6982
6983 gap = gaps[i];
6984 if (!entry) {
6985 if (gap != p_end - p_start + 1) {
6986 pr_err("%p[%u] -> %p %lu != %lu - %lu + 1\n",
6987 mas_mn(mas), i,
6988 mas_get_slot(mas, i), gap,
6989 p_end, p_start);
6990 mt_dump(mas->tree);
6991
6992 MT_BUG_ON(mas->tree,
6993 gap != p_end - p_start + 1);
6994 }
6995 } else {
6996 if (gap > p_end - p_start + 1) {
6997 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
6998 mas_mn(mas), i, gap, p_end, p_start,
6999 p_end - p_start + 1);
7000 MT_BUG_ON(mas->tree,
7001 gap > p_end - p_start + 1);
7002 }
7003 }
7004 }
7005
7006 if (gap > max_gap)
7007 max_gap = gap;
7008not_empty:
7009 p_start = p_end + 1;
7010 if (p_end >= mas->max)
7011 break;
7012 }
7013
7014counted:
7015 if (mte_is_root(mte))
7016 return;
7017
7018 p_slot = mte_parent_slot(mas->node);
7019 p_mn = mte_parent(mte);
7020 MT_BUG_ON(mas->tree, max_gap > mas->max);
7021 if (ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap) {
7022 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
7023 mt_dump(mas->tree);
7024 }
7025
7026 MT_BUG_ON(mas->tree,
7027 ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap);
7028}
7029
7030static void mas_validate_parent_slot(struct ma_state *mas)
7031{
7032 struct maple_node *parent;
7033 struct maple_enode *node;
7034 enum maple_type p_type = mas_parent_enum(mas, mas->node);
7035 unsigned char p_slot = mte_parent_slot(mas->node);
7036 void __rcu **slots;
7037 int i;
7038
7039 if (mte_is_root(mas->node))
7040 return;
7041
7042 parent = mte_parent(mas->node);
7043 slots = ma_slots(parent, p_type);
7044 MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
7045
7046 /* Check prev/next parent slot for duplicate node entry */
7047
7048 for (i = 0; i < mt_slots[p_type]; i++) {
7049 node = mas_slot(mas, slots, i);
7050 if (i == p_slot) {
7051 if (node != mas->node)
7052 pr_err("parent %p[%u] does not have %p\n",
7053 parent, i, mas_mn(mas));
7054 MT_BUG_ON(mas->tree, node != mas->node);
7055 } else if (node == mas->node) {
7056 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
7057 mas_mn(mas), parent, i, p_slot);
7058 MT_BUG_ON(mas->tree, node == mas->node);
7059 }
7060 }
7061}
7062
7063static void mas_validate_child_slot(struct ma_state *mas)
7064{
7065 enum maple_type type = mte_node_type(mas->node);
7066 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7067 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
7068 struct maple_enode *child;
7069 unsigned char i;
7070
7071 if (mte_is_leaf(mas->node))
7072 return;
7073
7074 for (i = 0; i < mt_slots[type]; i++) {
7075 child = mas_slot(mas, slots, i);
7076 if (!pivots[i] || pivots[i] == mas->max)
7077 break;
7078
7079 if (!child)
7080 break;
7081
7082 if (mte_parent_slot(child) != i) {
7083 pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
7084 mas_mn(mas), i, mte_to_node(child),
7085 mte_parent_slot(child));
7086 MT_BUG_ON(mas->tree, 1);
7087 }
7088
7089 if (mte_parent(child) != mte_to_node(mas->node)) {
7090 pr_err("child %p has parent %p not %p\n",
7091 mte_to_node(child), mte_parent(child),
7092 mte_to_node(mas->node));
7093 MT_BUG_ON(mas->tree, 1);
7094 }
7095 }
7096}
7097
7098/*
7099 * Validate all pivots are within mas->min and mas->max.
7100 */
7101static void mas_validate_limits(struct ma_state *mas)
7102{
7103 int i;
7104 unsigned long prev_piv = 0;
7105 enum maple_type type = mte_node_type(mas->node);
7106 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7107 unsigned long *pivots = ma_pivots(mas_mn(mas), type);
7108
7109 /* all limits are fine here. */
7110 if (mte_is_root(mas->node))
7111 return;
7112
7113 for (i = 0; i < mt_slots[type]; i++) {
7114 unsigned long piv;
7115
7116 piv = mas_safe_pivot(mas, pivots, i, type);
7117
7118 if (!piv && (i != 0))
7119 break;
7120
7121 if (!mte_is_leaf(mas->node)) {
7122 void *entry = mas_slot(mas, slots, i);
7123
7124 if (!entry)
7125 pr_err("%p[%u] cannot be null\n",
7126 mas_mn(mas), i);
7127
7128 MT_BUG_ON(mas->tree, !entry);
7129 }
7130
7131 if (prev_piv > piv) {
7132 pr_err("%p[%u] piv %lu < prev_piv %lu\n",
7133 mas_mn(mas), i, piv, prev_piv);
7134 MT_BUG_ON(mas->tree, piv < prev_piv);
7135 }
7136
7137 if (piv < mas->min) {
7138 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
7139 piv, mas->min);
7140 MT_BUG_ON(mas->tree, piv < mas->min);
7141 }
7142 if (piv > mas->max) {
7143 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
7144 piv, mas->max);
7145 MT_BUG_ON(mas->tree, piv > mas->max);
7146 }
7147 prev_piv = piv;
7148 if (piv == mas->max)
7149 break;
7150 }
7151 for (i += 1; i < mt_slots[type]; i++) {
7152 void *entry = mas_slot(mas, slots, i);
7153
7154 if (entry && (i != mt_slots[type] - 1)) {
7155 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
7156 i, entry);
7157 MT_BUG_ON(mas->tree, entry != NULL);
7158 }
7159
7160 if (i < mt_pivots[type]) {
7161 unsigned long piv = pivots[i];
7162
7163 if (!piv)
7164 continue;
7165
7166 pr_err("%p[%u] should not have piv %lu\n",
7167 mas_mn(mas), i, piv);
7168 MT_BUG_ON(mas->tree, i < mt_pivots[type] - 1);
7169 }
7170 }
7171}
7172
7173static void mt_validate_nulls(struct maple_tree *mt)
7174{
7175 void *entry, *last = (void *)1;
7176 unsigned char offset = 0;
7177 void __rcu **slots;
7178 MA_STATE(mas, mt, 0, 0);
7179
7180 mas_start(&mas);
7181 if (mas_is_none(&mas) || (mas.node == MAS_ROOT))
7182 return;
7183
7184 while (!mte_is_leaf(mas.node))
7185 mas_descend(&mas);
7186
7187 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
7188 do {
7189 entry = mas_slot(&mas, slots, offset);
7190 if (!last && !entry) {
7191 pr_err("Sequential nulls end at %p[%u]\n",
7192 mas_mn(&mas), offset);
7193 }
7194 MT_BUG_ON(mt, !last && !entry);
7195 last = entry;
7196 if (offset == mas_data_end(&mas)) {
7197 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
7198 if (mas_is_none(&mas))
7199 return;
7200 offset = 0;
7201 slots = ma_slots(mte_to_node(mas.node),
7202 mte_node_type(mas.node));
7203 } else {
7204 offset++;
7205 }
7206
7207 } while (!mas_is_none(&mas));
7208}
7209
7210/*
7211 * validate a maple tree by checking:
7212 * 1. The limits (pivots are within mas->min to mas->max)
7213 * 2. The gap is correctly set in the parents
7214 */
7215void mt_validate(struct maple_tree *mt)
7216{
7217 unsigned char end;
7218
7219 MA_STATE(mas, mt, 0, 0);
7220 rcu_read_lock();
7221 mas_start(&mas);
7222 if (!mas_searchable(&mas))
7223 goto done;
7224
7225 mas_first_entry(&mas, mas_mn(&mas), ULONG_MAX, mte_node_type(mas.node));
7226 while (!mas_is_none(&mas)) {
7227 MT_BUG_ON(mas.tree, mte_dead_node(mas.node));
7228 if (!mte_is_root(mas.node)) {
7229 end = mas_data_end(&mas);
7230 if ((end < mt_min_slot_count(mas.node)) &&
7231 (mas.max != ULONG_MAX)) {
7232 pr_err("Invalid size %u of %p\n", end,
7233 mas_mn(&mas));
7234 MT_BUG_ON(mas.tree, 1);
7235 }
7236
7237 }
7238 mas_validate_parent_slot(&mas);
7239 mas_validate_child_slot(&mas);
7240 mas_validate_limits(&mas);
7241 if (mt_is_alloc(mt))
7242 mas_validate_gaps(&mas);
7243 mas_dfs_postorder(&mas, ULONG_MAX);
7244 }
7245 mt_validate_nulls(mt);
7246done:
7247 rcu_read_unlock();
7248
7249}
120b1162 7250EXPORT_SYMBOL_GPL(mt_validate);
54a611b6
LH
7251
7252#endif /* CONFIG_DEBUG_MAPLE_TREE */