Commit | Line | Data |
---|---|---|
b9c87f53 NK |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* | |
3 | * Convert sample address to data type using DWARF debug info. | |
4 | * | |
5 | * Written by Namhyung Kim <namhyung@kernel.org> | |
6 | */ | |
7 | ||
8 | #include <stdio.h> | |
9 | #include <stdlib.h> | |
10 | #include <inttypes.h> | |
11 | ||
d3030191 | 12 | #include "annotate.h" |
b9c87f53 NK |
13 | #include "annotate-data.h" |
14 | #include "debuginfo.h" | |
15 | #include "debug.h" | |
16 | #include "dso.h" | |
83bfa06d | 17 | #include "dwarf-regs.h" |
9bd7ddd1 NK |
18 | #include "evsel.h" |
19 | #include "evlist.h" | |
b9c87f53 NK |
20 | #include "map.h" |
21 | #include "map_symbol.h" | |
22 | #include "strbuf.h" | |
23 | #include "symbol.h" | |
263925bf | 24 | #include "symbol_conf.h" |
b9c87f53 | 25 | |
fc044c53 NK |
26 | /* |
27 | * Compare type name and size to maintain them in a tree. | |
28 | * I'm not sure if DWARF would have information of a single type in many | |
29 | * different places (compilation units). If not, it could compare the | |
30 | * offset of the type entry in the .debug_info section. | |
31 | */ | |
32 | static int data_type_cmp(const void *_key, const struct rb_node *node) | |
33 | { | |
34 | const struct annotated_data_type *key = _key; | |
35 | struct annotated_data_type *type; | |
36 | ||
37 | type = rb_entry(node, struct annotated_data_type, node); | |
38 | ||
4a111cad NK |
39 | if (key->self.size != type->self.size) |
40 | return key->self.size - type->self.size; | |
41 | return strcmp(key->self.type_name, type->self.type_name); | |
fc044c53 NK |
42 | } |
43 | ||
44 | static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) | |
45 | { | |
46 | struct annotated_data_type *a, *b; | |
47 | ||
48 | a = rb_entry(node_a, struct annotated_data_type, node); | |
49 | b = rb_entry(node_b, struct annotated_data_type, node); | |
50 | ||
4a111cad NK |
51 | if (a->self.size != b->self.size) |
52 | return a->self.size < b->self.size; | |
53 | return strcmp(a->self.type_name, b->self.type_name) < 0; | |
54 | } | |
55 | ||
56 | /* Recursively add new members for struct/union */ | |
57 | static int __add_member_cb(Dwarf_Die *die, void *arg) | |
58 | { | |
59 | struct annotated_member *parent = arg; | |
60 | struct annotated_member *member; | |
61 | Dwarf_Die member_type, die_mem; | |
62 | Dwarf_Word size, loc; | |
63 | Dwarf_Attribute attr; | |
64 | struct strbuf sb; | |
65 | int tag; | |
66 | ||
67 | if (dwarf_tag(die) != DW_TAG_member) | |
68 | return DIE_FIND_CB_SIBLING; | |
69 | ||
70 | member = zalloc(sizeof(*member)); | |
71 | if (member == NULL) | |
72 | return DIE_FIND_CB_END; | |
73 | ||
74 | strbuf_init(&sb, 32); | |
75 | die_get_typename(die, &sb); | |
76 | ||
77 | die_get_real_type(die, &member_type); | |
78 | if (dwarf_aggregate_size(&member_type, &size) < 0) | |
79 | size = 0; | |
80 | ||
81 | if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) | |
82 | loc = 0; | |
83 | else | |
84 | dwarf_formudata(&attr, &loc); | |
85 | ||
86 | member->type_name = strbuf_detach(&sb, NULL); | |
87 | /* member->var_name can be NULL */ | |
88 | if (dwarf_diename(die)) | |
89 | member->var_name = strdup(dwarf_diename(die)); | |
90 | member->size = size; | |
91 | member->offset = loc + parent->offset; | |
92 | INIT_LIST_HEAD(&member->children); | |
93 | list_add_tail(&member->node, &parent->children); | |
94 | ||
95 | tag = dwarf_tag(&member_type); | |
96 | switch (tag) { | |
97 | case DW_TAG_structure_type: | |
98 | case DW_TAG_union_type: | |
99 | die_find_child(&member_type, __add_member_cb, member, &die_mem); | |
100 | break; | |
101 | default: | |
102 | break; | |
103 | } | |
104 | return DIE_FIND_CB_SIBLING; | |
105 | } | |
106 | ||
107 | static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) | |
108 | { | |
109 | Dwarf_Die die_mem; | |
110 | ||
111 | die_find_child(type, __add_member_cb, &parent->self, &die_mem); | |
112 | } | |
113 | ||
114 | static void delete_members(struct annotated_member *member) | |
115 | { | |
116 | struct annotated_member *child, *tmp; | |
117 | ||
118 | list_for_each_entry_safe(child, tmp, &member->children, node) { | |
119 | list_del(&child->node); | |
120 | delete_members(child); | |
121 | free(child->type_name); | |
122 | free(child->var_name); | |
123 | free(child); | |
124 | } | |
fc044c53 NK |
125 | } |
126 | ||
127 | static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, | |
128 | Dwarf_Die *type_die) | |
129 | { | |
130 | struct annotated_data_type *result = NULL; | |
131 | struct annotated_data_type key; | |
132 | struct rb_node *node; | |
133 | struct strbuf sb; | |
134 | char *type_name; | |
135 | Dwarf_Word size; | |
136 | ||
137 | strbuf_init(&sb, 32); | |
138 | if (die_get_typename_from_type(type_die, &sb) < 0) | |
139 | strbuf_add(&sb, "(unknown type)", 14); | |
140 | type_name = strbuf_detach(&sb, NULL); | |
141 | dwarf_aggregate_size(type_die, &size); | |
142 | ||
143 | /* Check existing nodes in dso->data_types tree */ | |
4a111cad NK |
144 | key.self.type_name = type_name; |
145 | key.self.size = size; | |
fc044c53 NK |
146 | node = rb_find(&key, &dso->data_types, data_type_cmp); |
147 | if (node) { | |
148 | result = rb_entry(node, struct annotated_data_type, node); | |
149 | free(type_name); | |
150 | return result; | |
151 | } | |
152 | ||
153 | /* If not, add a new one */ | |
154 | result = zalloc(sizeof(*result)); | |
155 | if (result == NULL) { | |
156 | free(type_name); | |
157 | return NULL; | |
158 | } | |
159 | ||
4a111cad NK |
160 | result->self.type_name = type_name; |
161 | result->self.size = size; | |
162 | INIT_LIST_HEAD(&result->self.children); | |
163 | ||
263925bf NK |
164 | if (symbol_conf.annotate_data_member) |
165 | add_member_types(result, type_die); | |
fc044c53 NK |
166 | |
167 | rb_add(&result->node, &dso->data_types, data_type_less); | |
168 | return result; | |
169 | } | |
170 | ||
b9c87f53 NK |
171 | static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) |
172 | { | |
173 | Dwarf_Off off, next_off; | |
174 | size_t header_size; | |
175 | ||
176 | if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) | |
177 | return cu_die; | |
178 | ||
179 | /* | |
180 | * There are some kernels don't have full aranges and contain only a few | |
181 | * aranges entries. Fallback to iterate all CU entries in .debug_info | |
182 | * in case it's missing. | |
183 | */ | |
184 | off = 0; | |
185 | while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, | |
186 | NULL, NULL, NULL) == 0) { | |
187 | if (dwarf_offdie(di->dbg, off + header_size, cu_die) && | |
188 | dwarf_haspc(cu_die, pc)) | |
189 | return true; | |
190 | ||
191 | off = next_off; | |
192 | } | |
193 | return false; | |
194 | } | |
195 | ||
196 | /* The type info will be saved in @type_die */ | |
83bfa06d NK |
197 | static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset, |
198 | bool is_pointer) | |
b9c87f53 NK |
199 | { |
200 | Dwarf_Word size; | |
201 | ||
202 | /* Get the type of the variable */ | |
203 | if (die_get_real_type(var_die, type_die) == NULL) { | |
204 | pr_debug("variable has no type\n"); | |
61a9741e | 205 | ann_data_stat.no_typeinfo++; |
b9c87f53 NK |
206 | return -1; |
207 | } | |
208 | ||
209 | /* | |
83bfa06d NK |
210 | * Usually it expects a pointer type for a memory access. |
211 | * Convert to a real type it points to. But global variables | |
bc10db8e | 212 | * and local variables are accessed directly without a pointer. |
b9c87f53 | 213 | */ |
83bfa06d NK |
214 | if (is_pointer) { |
215 | if ((dwarf_tag(type_die) != DW_TAG_pointer_type && | |
216 | dwarf_tag(type_die) != DW_TAG_array_type) || | |
217 | die_get_real_type(type_die, type_die) == NULL) { | |
218 | pr_debug("no pointer or no type\n"); | |
219 | ann_data_stat.no_typeinfo++; | |
220 | return -1; | |
221 | } | |
b9c87f53 NK |
222 | } |
223 | ||
224 | /* Get the size of the actual type */ | |
225 | if (dwarf_aggregate_size(type_die, &size) < 0) { | |
226 | pr_debug("type size is unknown\n"); | |
61a9741e | 227 | ann_data_stat.invalid_size++; |
b9c87f53 NK |
228 | return -1; |
229 | } | |
230 | ||
231 | /* Minimal sanity check */ | |
232 | if ((unsigned)offset >= size) { | |
233 | pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); | |
61a9741e | 234 | ann_data_stat.bad_offset++; |
b9c87f53 NK |
235 | return -1; |
236 | } | |
237 | ||
238 | return 0; | |
239 | } | |
240 | ||
241 | /* The result will be saved in @type_die */ | |
83bfa06d | 242 | static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, |
5f7cdde8 NK |
243 | const char *var_name, struct annotated_op_loc *loc, |
244 | Dwarf_Die *type_die) | |
b9c87f53 NK |
245 | { |
246 | Dwarf_Die cu_die, var_die; | |
247 | Dwarf_Die *scopes = NULL; | |
d3030191 | 248 | int reg, offset; |
b9c87f53 NK |
249 | int ret = -1; |
250 | int i, nr_scopes; | |
bc10db8e NK |
251 | int fbreg = -1; |
252 | bool is_fbreg = false; | |
253 | int fb_offset = 0; | |
b9c87f53 NK |
254 | |
255 | /* Get a compile_unit for this address */ | |
256 | if (!find_cu_die(di, pc, &cu_die)) { | |
257 | pr_debug("cannot find CU for address %" PRIx64 "\n", pc); | |
61a9741e | 258 | ann_data_stat.no_cuinfo++; |
b9c87f53 NK |
259 | return -1; |
260 | } | |
261 | ||
d3030191 NK |
262 | reg = loc->reg1; |
263 | offset = loc->offset; | |
264 | ||
5f7cdde8 NK |
265 | if (reg == DWARF_REG_PC) { |
266 | if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { | |
267 | ret = check_variable(&var_die, type_die, offset, | |
268 | /*is_pointer=*/false); | |
269 | loc->offset = offset; | |
270 | goto out; | |
271 | } | |
272 | ||
273 | if (var_name && die_find_variable_at(&cu_die, var_name, pc, | |
274 | &var_die)) { | |
275 | ret = check_variable(&var_die, type_die, 0, | |
276 | /*is_pointer=*/false); | |
277 | /* loc->offset will be updated by the caller */ | |
278 | goto out; | |
279 | } | |
83bfa06d NK |
280 | } |
281 | ||
282 | /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ | |
283 | nr_scopes = die_get_scopes(&cu_die, pc, &scopes); | |
284 | ||
bc10db8e NK |
285 | if (reg != DWARF_REG_PC && dwarf_hasattr(&scopes[0], DW_AT_frame_base)) { |
286 | Dwarf_Attribute attr; | |
287 | Dwarf_Block block; | |
288 | ||
289 | /* Check if the 'reg' is assigned as frame base register */ | |
290 | if (dwarf_attr(&scopes[0], DW_AT_frame_base, &attr) != NULL && | |
291 | dwarf_formblock(&attr, &block) == 0 && block.length == 1) { | |
292 | switch (*block.data) { | |
293 | case DW_OP_reg0 ... DW_OP_reg31: | |
294 | fbreg = *block.data - DW_OP_reg0; | |
295 | break; | |
296 | case DW_OP_call_frame_cfa: | |
297 | if (die_get_cfa(di->dbg, pc, &fbreg, | |
298 | &fb_offset) < 0) | |
299 | fbreg = -1; | |
300 | break; | |
301 | default: | |
302 | break; | |
303 | } | |
304 | } | |
305 | } | |
306 | ||
d3030191 | 307 | retry: |
bc10db8e NK |
308 | is_fbreg = (reg == fbreg); |
309 | if (is_fbreg) | |
310 | offset = loc->offset - fb_offset; | |
311 | ||
b9c87f53 NK |
312 | /* Search from the inner-most scope to the outer */ |
313 | for (i = nr_scopes - 1; i >= 0; i--) { | |
83bfa06d NK |
314 | if (reg == DWARF_REG_PC) { |
315 | if (!die_find_variable_by_addr(&scopes[i], pc, addr, | |
316 | &var_die, &offset)) | |
317 | continue; | |
318 | } else { | |
319 | /* Look up variables/parameters in this scope */ | |
320 | if (!die_find_variable_by_reg(&scopes[i], pc, reg, | |
bc10db8e | 321 | &offset, is_fbreg, &var_die)) |
83bfa06d NK |
322 | continue; |
323 | } | |
b9c87f53 NK |
324 | |
325 | /* Found a variable, see if it's correct */ | |
83bfa06d | 326 | ret = check_variable(&var_die, type_die, offset, |
bc10db8e | 327 | reg != DWARF_REG_PC && !is_fbreg); |
5f7cdde8 | 328 | loc->offset = offset; |
61a9741e | 329 | goto out; |
b9c87f53 | 330 | } |
d3030191 NK |
331 | |
332 | if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { | |
333 | reg = loc->reg2; | |
334 | goto retry; | |
335 | } | |
336 | ||
61a9741e NK |
337 | if (ret < 0) |
338 | ann_data_stat.no_var++; | |
b9c87f53 | 339 | |
61a9741e | 340 | out: |
b9c87f53 NK |
341 | free(scopes); |
342 | return ret; | |
343 | } | |
344 | ||
345 | /** | |
346 | * find_data_type - Return a data type at the location | |
347 | * @ms: map and symbol at the location | |
348 | * @ip: instruction address of the memory access | |
d3030191 | 349 | * @loc: instruction operand location |
5f7cdde8 NK |
350 | * @addr: data address of the memory access |
351 | * @var_name: global variable name | |
b9c87f53 NK |
352 | * |
353 | * This functions searches the debug information of the binary to get the data | |
5f7cdde8 NK |
354 | * type it accesses. The exact location is expressed by (@ip, reg, offset) |
355 | * for pointer variables or (@ip, @addr) for global variables. Note that global | |
356 | * variables might update the @loc->offset after finding the start of the variable. | |
357 | * If it cannot find a global variable by address, it tried to fine a declaration | |
358 | * of the variable using @var_name. In that case, @loc->offset won't be updated. | |
359 | * | |
b9c87f53 NK |
360 | * It return %NULL if not found. |
361 | */ | |
362 | struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, | |
5f7cdde8 NK |
363 | struct annotated_op_loc *loc, u64 addr, |
364 | const char *var_name) | |
b9c87f53 NK |
365 | { |
366 | struct annotated_data_type *result = NULL; | |
367 | struct dso *dso = map__dso(ms->map); | |
368 | struct debuginfo *di; | |
369 | Dwarf_Die type_die; | |
b9c87f53 NK |
370 | u64 pc; |
371 | ||
372 | di = debuginfo__new(dso->long_name); | |
373 | if (di == NULL) { | |
374 | pr_debug("cannot get the debug info\n"); | |
375 | return NULL; | |
376 | } | |
377 | ||
378 | /* | |
379 | * IP is a relative instruction address from the start of the map, as | |
380 | * it can be randomized/relocated, it needs to translate to PC which is | |
381 | * a file address for DWARF processing. | |
382 | */ | |
383 | pc = map__rip_2objdump(ms->map, ip); | |
5f7cdde8 | 384 | if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0) |
b9c87f53 NK |
385 | goto out; |
386 | ||
fc044c53 | 387 | result = dso__findnew_data_type(dso, &type_die); |
b9c87f53 NK |
388 | |
389 | out: | |
390 | debuginfo__delete(di); | |
391 | return result; | |
392 | } | |
fc044c53 | 393 | |
9bd7ddd1 NK |
394 | static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) |
395 | { | |
396 | int i; | |
397 | size_t sz = sizeof(struct type_hist); | |
398 | ||
399 | sz += sizeof(struct type_hist_entry) * adt->self.size; | |
400 | ||
401 | /* Allocate a table of pointers for each event */ | |
402 | adt->nr_histograms = nr_entries; | |
403 | adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); | |
404 | if (adt->histograms == NULL) | |
405 | return -ENOMEM; | |
406 | ||
407 | /* | |
408 | * Each histogram is allocated for the whole size of the type. | |
409 | * TODO: Probably we can move the histogram to members. | |
410 | */ | |
411 | for (i = 0; i < nr_entries; i++) { | |
412 | adt->histograms[i] = zalloc(sz); | |
413 | if (adt->histograms[i] == NULL) | |
414 | goto err; | |
415 | } | |
416 | return 0; | |
417 | ||
418 | err: | |
419 | while (--i >= 0) | |
420 | free(adt->histograms[i]); | |
421 | free(adt->histograms); | |
422 | return -ENOMEM; | |
423 | } | |
424 | ||
425 | static void delete_data_type_histograms(struct annotated_data_type *adt) | |
426 | { | |
427 | for (int i = 0; i < adt->nr_histograms; i++) | |
428 | free(adt->histograms[i]); | |
429 | free(adt->histograms); | |
430 | } | |
431 | ||
fc044c53 NK |
432 | void annotated_data_type__tree_delete(struct rb_root *root) |
433 | { | |
434 | struct annotated_data_type *pos; | |
435 | ||
436 | while (!RB_EMPTY_ROOT(root)) { | |
437 | struct rb_node *node = rb_first(root); | |
438 | ||
439 | rb_erase(node, root); | |
440 | pos = rb_entry(node, struct annotated_data_type, node); | |
4a111cad | 441 | delete_members(&pos->self); |
9bd7ddd1 | 442 | delete_data_type_histograms(pos); |
4a111cad | 443 | free(pos->self.type_name); |
fc044c53 NK |
444 | free(pos); |
445 | } | |
446 | } | |
9bd7ddd1 NK |
447 | |
448 | /** | |
449 | * annotated_data_type__update_samples - Update histogram | |
450 | * @adt: Data type to update | |
451 | * @evsel: Event to update | |
452 | * @offset: Offset in the type | |
453 | * @nr_samples: Number of samples at this offset | |
454 | * @period: Event count at this offset | |
455 | * | |
456 | * This function updates type histogram at @ofs for @evsel. Samples are | |
457 | * aggregated before calling this function so it can be called with more | |
458 | * than one samples at a certain offset. | |
459 | */ | |
460 | int annotated_data_type__update_samples(struct annotated_data_type *adt, | |
461 | struct evsel *evsel, int offset, | |
462 | int nr_samples, u64 period) | |
463 | { | |
464 | struct type_hist *h; | |
465 | ||
466 | if (adt == NULL) | |
467 | return 0; | |
468 | ||
469 | if (adt->histograms == NULL) { | |
470 | int nr = evsel->evlist->core.nr_entries; | |
471 | ||
472 | if (alloc_data_type_histograms(adt, nr) < 0) | |
473 | return -1; | |
474 | } | |
475 | ||
476 | if (offset < 0 || offset >= adt->self.size) | |
477 | return -1; | |
478 | ||
479 | h = adt->histograms[evsel->core.idx]; | |
480 | ||
481 | h->nr_samples += nr_samples; | |
482 | h->addr[offset].nr_samples += nr_samples; | |
483 | h->period += period; | |
484 | h->addr[offset].period += period; | |
485 | return 0; | |
486 | } |