Commit | Line | Data |
---|---|---|
2b5067a8 AR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define CREATE_TRACE_POINTS | |
3 | #include <trace/events/mmap_lock.h> | |
4 | ||
5 | #include <linux/mm.h> | |
6 | #include <linux/cgroup.h> | |
7 | #include <linux/memcontrol.h> | |
8 | #include <linux/mmap_lock.h> | |
9 | #include <linux/mutex.h> | |
10 | #include <linux/percpu.h> | |
11 | #include <linux/rcupdate.h> | |
12 | #include <linux/smp.h> | |
13 | #include <linux/trace_events.h> | |
14 | ||
15 | EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); | |
16 | EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); | |
17 | EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); | |
18 | ||
19 | #ifdef CONFIG_MEMCG | |
20 | ||
21 | /* | |
22 | * Our various events all share the same buffer (because we don't want or need | |
23 | * to allocate a set of buffers *per event type*), so we need to protect against | |
24 | * concurrent _reg() and _unreg() calls, and count how many _reg() calls have | |
25 | * been made. | |
26 | */ | |
27 | static DEFINE_MUTEX(reg_lock); | |
28 | static int reg_refcount; /* Protected by reg_lock. */ | |
29 | ||
30 | /* | |
31 | * Size of the buffer for memcg path names. Ignoring stack trace support, | |
32 | * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it. | |
33 | */ | |
34 | #define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL | |
35 | ||
36 | /* | |
37 | * How many contexts our trace events might be called in: normal, softirq, irq, | |
38 | * and NMI. | |
39 | */ | |
40 | #define CONTEXT_COUNT 4 | |
41 | ||
42 | static DEFINE_PER_CPU(char __rcu *, memcg_path_buf); | |
43 | static char **tmp_bufs; | |
44 | static DEFINE_PER_CPU(int, memcg_path_buf_idx); | |
45 | ||
46 | /* Called with reg_lock held. */ | |
47 | static void free_memcg_path_bufs(void) | |
48 | { | |
49 | int cpu; | |
50 | char **old = tmp_bufs; | |
51 | ||
52 | for_each_possible_cpu(cpu) { | |
53 | *(old++) = rcu_dereference_protected( | |
54 | per_cpu(memcg_path_buf, cpu), | |
55 | lockdep_is_held(®_lock)); | |
56 | rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), NULL); | |
57 | } | |
58 | ||
59 | /* Wait for inflight memcg_path_buf users to finish. */ | |
60 | synchronize_rcu(); | |
61 | ||
62 | old = tmp_bufs; | |
63 | for_each_possible_cpu(cpu) { | |
64 | kfree(*(old++)); | |
65 | } | |
66 | ||
67 | kfree(tmp_bufs); | |
68 | tmp_bufs = NULL; | |
69 | } | |
70 | ||
71 | int trace_mmap_lock_reg(void) | |
72 | { | |
73 | int cpu; | |
74 | char *new; | |
75 | ||
76 | mutex_lock(®_lock); | |
77 | ||
78 | /* If the refcount is going 0->1, proceed with allocating buffers. */ | |
79 | if (reg_refcount++) | |
80 | goto out; | |
81 | ||
82 | tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs), | |
83 | GFP_KERNEL); | |
84 | if (tmp_bufs == NULL) | |
85 | goto out_fail; | |
86 | ||
87 | for_each_possible_cpu(cpu) { | |
88 | new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL); | |
89 | if (new == NULL) | |
90 | goto out_fail_free; | |
91 | rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), new); | |
92 | /* Don't need to wait for inflights, they'd have gotten NULL. */ | |
93 | } | |
94 | ||
95 | out: | |
96 | mutex_unlock(®_lock); | |
97 | return 0; | |
98 | ||
99 | out_fail_free: | |
100 | free_memcg_path_bufs(); | |
101 | out_fail: | |
102 | /* Since we failed, undo the earlier ref increment. */ | |
103 | --reg_refcount; | |
104 | ||
105 | mutex_unlock(®_lock); | |
106 | return -ENOMEM; | |
107 | } | |
108 | ||
109 | void trace_mmap_lock_unreg(void) | |
110 | { | |
111 | mutex_lock(®_lock); | |
112 | ||
113 | /* If the refcount is going 1->0, proceed with freeing buffers. */ | |
114 | if (--reg_refcount) | |
115 | goto out; | |
116 | ||
117 | free_memcg_path_bufs(); | |
118 | ||
119 | out: | |
120 | mutex_unlock(®_lock); | |
121 | } | |
122 | ||
123 | static inline char *get_memcg_path_buf(void) | |
124 | { | |
125 | char *buf; | |
126 | int idx; | |
127 | ||
128 | rcu_read_lock(); | |
129 | buf = rcu_dereference(*this_cpu_ptr(&memcg_path_buf)); | |
130 | if (buf == NULL) { | |
131 | rcu_read_unlock(); | |
132 | return NULL; | |
133 | } | |
134 | idx = this_cpu_add_return(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE) - | |
135 | MEMCG_PATH_BUF_SIZE; | |
136 | return &buf[idx]; | |
137 | } | |
138 | ||
139 | static inline void put_memcg_path_buf(void) | |
140 | { | |
141 | this_cpu_sub(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE); | |
142 | rcu_read_unlock(); | |
143 | } | |
144 | ||
145 | /* | |
146 | * Write the given mm_struct's memcg path to a percpu buffer, and return a | |
147 | * pointer to it. If the path cannot be determined, or no buffer was available | |
148 | * (because the trace event is being unregistered), NULL is returned. | |
149 | * | |
150 | * Note: buffers are allocated per-cpu to avoid locking, so preemption must be | |
151 | * disabled by the caller before calling us, and re-enabled only after the | |
152 | * caller is done with the pointer. | |
153 | * | |
154 | * The caller must call put_memcg_path_buf() once the buffer is no longer | |
155 | * needed. This must be done while preemption is still disabled. | |
156 | */ | |
157 | static const char *get_mm_memcg_path(struct mm_struct *mm) | |
158 | { | |
159 | char *buf = NULL; | |
160 | struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); | |
161 | ||
162 | if (memcg == NULL) | |
163 | goto out; | |
164 | if (unlikely(memcg->css.cgroup == NULL)) | |
165 | goto out_put; | |
166 | ||
167 | buf = get_memcg_path_buf(); | |
168 | if (buf == NULL) | |
169 | goto out_put; | |
170 | ||
171 | cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE); | |
172 | ||
173 | out_put: | |
174 | css_put(&memcg->css); | |
175 | out: | |
176 | return buf; | |
177 | } | |
178 | ||
179 | #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ | |
180 | do { \ | |
181 | const char *memcg_path; \ | |
182 | preempt_disable(); \ | |
183 | memcg_path = get_mm_memcg_path(mm); \ | |
184 | trace_mmap_lock_##type(mm, \ | |
185 | memcg_path != NULL ? memcg_path : "", \ | |
186 | ##__VA_ARGS__); \ | |
187 | if (likely(memcg_path != NULL)) \ | |
188 | put_memcg_path_buf(); \ | |
189 | preempt_enable(); \ | |
190 | } while (0) | |
191 | ||
192 | #else /* !CONFIG_MEMCG */ | |
193 | ||
194 | int trace_mmap_lock_reg(void) | |
195 | { | |
196 | return 0; | |
197 | } | |
198 | ||
199 | void trace_mmap_lock_unreg(void) | |
200 | { | |
201 | } | |
202 | ||
203 | #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ | |
204 | trace_mmap_lock_##type(mm, "", ##__VA_ARGS__) | |
205 | ||
206 | #endif /* CONFIG_MEMCG */ | |
207 | ||
208 | /* | |
209 | * Trace calls must be in a separate file, as otherwise there's a circular | |
210 | * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. | |
211 | */ | |
212 | ||
213 | void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) | |
214 | { | |
215 | TRACE_MMAP_LOCK_EVENT(start_locking, mm, write); | |
216 | } | |
217 | EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); | |
218 | ||
219 | void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, | |
220 | bool success) | |
221 | { | |
222 | TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success); | |
223 | } | |
224 | EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); | |
225 | ||
226 | void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) | |
227 | { | |
228 | TRACE_MMAP_LOCK_EVENT(released, mm, write); | |
229 | } | |
230 | EXPORT_SYMBOL(__mmap_lock_do_trace_released); |