Commit | Line | Data |
---|---|---|
d19720a9 | 1 | Reference-count design for elements of lists/arrays protected by RCU. |
c0dfb290 | 2 | |
d19720a9 PM |
3 | Reference counting on elements of lists which are protected by traditional |
4 | reader/writer spinlocks or semaphores are straightforward: | |
c0dfb290 | 5 | |
095975da NP |
6 | 1. 2. |
7 | add() search_and_reference() | |
8 | { { | |
9 | alloc_object read_lock(&list_lock); | |
10 | ... search_for_element | |
11 | atomic_set(&el->rc, 1); atomic_inc(&el->rc); | |
12 | write_lock(&list_lock); ... | |
13 | add_element read_unlock(&list_lock); | |
14 | ... ... | |
15 | write_unlock(&list_lock); } | |
c0dfb290 DS |
16 | } |
17 | ||
18 | 3. 4. | |
19 | release_referenced() delete() | |
20 | { { | |
095975da NP |
21 | ... write_lock(&list_lock); |
22 | atomic_dec(&el->rc, relfunc) ... | |
a4d611fd | 23 | ... remove_element |
095975da NP |
24 | } write_unlock(&list_lock); |
25 | ... | |
26 | if (atomic_dec_and_test(&el->rc)) | |
27 | kfree(el); | |
28 | ... | |
c0dfb290 DS |
29 | } |
30 | ||
d19720a9 | 31 | If this list/array is made lock free using RCU as in changing the |
e8aed686 LJ |
32 | write_lock() in add() and delete() to spin_lock() and changing read_lock() |
33 | in search_and_reference() to rcu_read_lock(), the atomic_inc() in | |
34 | search_and_reference() could potentially hold reference to an element which | |
d19720a9 PM |
35 | has already been deleted from the list/array. Use atomic_inc_not_zero() |
36 | in this scenario as follows: | |
c0dfb290 DS |
37 | |
38 | 1. 2. | |
39 | add() search_and_reference() | |
40 | { { | |
095975da NP |
41 | alloc_object rcu_read_lock(); |
42 | ... search_for_element | |
e8aed686 LJ |
43 | atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) { |
44 | spin_lock(&list_lock); rcu_read_unlock(); | |
095975da NP |
45 | return FAIL; |
46 | add_element } | |
47 | ... ... | |
e8aed686 | 48 | spin_unlock(&list_lock); rcu_read_unlock(); |
c0dfb290 DS |
49 | } } |
50 | 3. 4. | |
51 | release_referenced() delete() | |
52 | { { | |
e8aed686 | 53 | ... spin_lock(&list_lock); |
d19720a9 | 54 | if (atomic_dec_and_test(&el->rc)) ... |
a4d611fd | 55 | call_rcu(&el->head, el_free); remove_element |
e8aed686 | 56 | ... spin_unlock(&list_lock); |
d19720a9 | 57 | } ... |
095975da NP |
58 | if (atomic_dec_and_test(&el->rc)) |
59 | call_rcu(&el->head, el_free); | |
60 | ... | |
c0dfb290 DS |
61 | } |
62 | ||
d19720a9 PM |
63 | Sometimes, a reference to the element needs to be obtained in the |
64 | update (write) stream. In such cases, atomic_inc_not_zero() might be | |
65 | overkill, since we hold the update-side spinlock. One might instead | |
66 | use atomic_inc() in such cases. | |
a4d611fd PM |
67 | |
68 | It is not always convenient to deal with "FAIL" in the | |
69 | search_and_reference() code path. In such cases, the | |
70 | atomic_dec_and_test() may be moved from delete() to el_free() | |
71 | as follows: | |
72 | ||
73 | 1. 2. | |
74 | add() search_and_reference() | |
75 | { { | |
76 | alloc_object rcu_read_lock(); | |
77 | ... search_for_element | |
78 | atomic_set(&el->rc, 1); atomic_inc(&el->rc); | |
79 | spin_lock(&list_lock); ... | |
80 | ||
81 | add_element rcu_read_unlock(); | |
82 | ... } | |
83 | spin_unlock(&list_lock); 4. | |
84 | } delete() | |
85 | 3. { | |
86 | release_referenced() spin_lock(&list_lock); | |
87 | { ... | |
88 | ... remove_element | |
89 | if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock); | |
90 | kfree(el); ... | |
91 | ... call_rcu(&el->head, el_free); | |
92 | } ... | |
93 | 5. } | |
94 | void el_free(struct rcu_head *rhp) | |
95 | { | |
96 | release_referenced(); | |
97 | } | |
98 | ||
99 | The key point is that the initial reference added by add() is not removed | |
100 | until after a grace period has elapsed following removal. This means that | |
101 | search_and_reference() cannot find this element, which means that the value | |
102 | of el->rc cannot increase. Thus, once it reaches zero, there are no | |
103 | readers that can or ever will be able to reference the element. The | |
104 | element can therefore safely be freed. This in turn guarantees that if | |
105 | any reader finds the element, that reader may safely acquire a reference | |
106 | without checking the value of the reference counter. | |
107 | ||
108 | In cases where delete() can sleep, synchronize_rcu() can be called from | |
109 | delete(), so that el_free() can be subsumed into delete as follows: | |
110 | ||
111 | 4. | |
112 | delete() | |
113 | { | |
114 | spin_lock(&list_lock); | |
115 | ... | |
116 | remove_element | |
117 | spin_unlock(&list_lock); | |
118 | ... | |
119 | synchronize_rcu(); | |
120 | if (atomic_dec_and_test(&el->rc)) | |
121 | kfree(el); | |
122 | ... | |
123 | } |