Merge tag 'ipsec-2023-10-17' of git://git.kernel.org/pub/scm/linux/kernel/git/klasser...
[linux-block.git] / tools / workqueue / wq_monitor.py
CommitLineData
725e8ec5
TH
1#!/usr/bin/env drgn
2#
3# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
4# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
5
6desc = """
7This is a drgn script to monitor workqueues. For more info on drgn, visit
8https://github.com/osandov/drgn.
9
10 total Total number of work items executed by the workqueue.
11
12 infl The number of currently in-flight work items.
13
8a1dd1e5
TH
14 CPUtime Total CPU time consumed by the workqueue in seconds. This is
15 sampled from scheduler ticks and only provides ballpark
16 measurement. "nohz_full=" CPUs are excluded from measurement.
17
616db877
TH
18 CPUitsv The number of times a concurrency-managed work item hogged CPU
19 longer than the threshold (workqueue.cpu_intensive_thresh_us)
20 and got excluded from concurrency management to avoid stalling
21 other work items.
22
8639eceb
TH
23 CMW/RPR For per-cpu workqueues, the number of concurrency-management
24 wake-ups while executing a work item of the workqueue. For
25 unbound workqueues, the number of times a worker was repatriated
26 to its affinity scope after being migrated to an off-scope CPU by
27 the scheduler.
725e8ec5
TH
28
29 mayday The number of times the rescuer was requested while waiting for
30 new worker creation.
31
32 rescued The number of work items executed by the rescuer.
33"""
34
35import sys
36import signal
37import os
38import re
39import time
40import json
41
42import drgn
43from drgn.helpers.linux.list import list_for_each_entry,list_empty
44from drgn.helpers.linux.cpumask import for_each_possible_cpu
45
46import argparse
47parser = argparse.ArgumentParser(description=desc,
48 formatter_class=argparse.RawTextHelpFormatter)
49parser.add_argument('workqueue', metavar='REGEX', nargs='*',
50 help='Target workqueue name patterns (all if empty)')
51parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
52 help='Monitoring interval (0 to print once and exit)')
53parser.add_argument('-j', '--json', action='store_true',
54 help='Output in json')
55args = parser.parse_args()
56
57def err(s):
58 print(s, file=sys.stderr, flush=True)
59 sys.exit(1)
60
61workqueues = prog['workqueues']
62
63WQ_UNBOUND = prog['WQ_UNBOUND']
64WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
65
66PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
67PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
8a1dd1e5 68PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
616db877 69PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
725e8ec5 70PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
8639eceb 71PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope
725e8ec5
TH
72PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
73PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
74PWQ_NR_STATS = prog['PWQ_NR_STATS']
75
76class WqStats:
77 def __init__(self, wq):
78 self.name = wq.name.string_().decode()
79 self.unbound = wq.flags & WQ_UNBOUND != 0
80 self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
81 self.stats = [0] * PWQ_NR_STATS
82 for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
83 for i in range(PWQ_NR_STATS):
84 self.stats[i] += int(pwq.stats[i])
85
86 def dict(self, now):
87 return { 'timestamp' : now,
88 'name' : self.name,
89 'unbound' : self.unbound,
90 'mem_reclaim' : self.mem_reclaim,
91 'started' : self.stats[PWQ_STAT_STARTED],
92 'completed' : self.stats[PWQ_STAT_COMPLETED],
8a1dd1e5 93 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
616db877 94 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
725e8ec5 95 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
8639eceb 96 'repatriated' : self.stats[PWQ_STAT_REPATRIATED],
725e8ec5
TH
97 'mayday' : self.stats[PWQ_STAT_MAYDAY],
98 'rescued' : self.stats[PWQ_STAT_RESCUED], }
99
100 def table_header_str():
8a1dd1e5 101 return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
8639eceb 102 f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
725e8ec5
TH
103
104 def table_row_str(self):
616db877 105 cpu_intensive = '-'
8639eceb 106 cmw_rpr = '-'
725e8ec5
TH
107 mayday = '-'
108 rescued = '-'
109
8639eceb
TH
110 if self.unbound:
111 cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
112 else:
616db877 113 cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
8639eceb 114 cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
725e8ec5
TH
115
116 if self.mem_reclaim:
117 mayday = str(self.stats[PWQ_STAT_MAYDAY])
118 rescued = str(self.stats[PWQ_STAT_RESCUED])
119
120 out = f'{self.name[-24:]:24} ' \
121 f'{self.stats[PWQ_STAT_STARTED]:8} ' \
122 f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
8a1dd1e5 123 f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
616db877 124 f'{cpu_intensive:>7} ' \
8639eceb 125 f'{cmw_rpr:>7} ' \
725e8ec5
TH
126 f'{mayday:>7} ' \
127 f'{rescued:>7} '
128 return out.rstrip(':')
129
130exit_req = False
131
132def sigint_handler(signr, frame):
133 global exit_req
134 exit_req = True
135
136def main():
137 # handle args
138 table_fmt = not args.json
139 interval = args.interval
140
141 re_str = None
142 if args.workqueue:
143 for r in args.workqueue:
144 if re_str is None:
145 re_str = r
146 else:
147 re_str += '|' + r
148
149 filter_re = re.compile(re_str) if re_str else None
150
151 # monitoring loop
152 signal.signal(signal.SIGINT, sigint_handler)
153
154 while not exit_req:
155 now = time.time()
156
157 if table_fmt:
158 print()
159 print(WqStats.table_header_str())
160
161 for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
162 stats = WqStats(wq)
163 if filter_re and not filter_re.search(stats.name):
164 continue
165 if table_fmt:
166 print(stats.table_row_str())
167 else:
168 print(stats.dict(now))
169
170 if interval == 0:
171 break
172 time.sleep(interval)
173
174if __name__ == "__main__":
175 main()