Commit | Line | Data |
---|---|---|
e4d2e82b MC |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Copyright (C) 2020 Oracle Corporation | |
4 | * | |
5 | * Module Author: Mike Christie | |
6 | */ | |
7 | #include "dm-path-selector.h" | |
8 | ||
9 | #include <linux/device-mapper.h> | |
10 | #include <linux/module.h> | |
11 | ||
12 | #define DM_MSG_PREFIX "multipath io-affinity" | |
13 | ||
14 | struct path_info { | |
15 | struct dm_path *path; | |
16 | cpumask_var_t cpumask; | |
17 | refcount_t refcount; | |
18 | bool failed; | |
19 | }; | |
20 | ||
21 | struct selector { | |
22 | struct path_info **path_map; | |
23 | cpumask_var_t path_mask; | |
24 | atomic_t map_misses; | |
25 | }; | |
26 | ||
27 | static void ioa_free_path(struct selector *s, unsigned int cpu) | |
28 | { | |
29 | struct path_info *pi = s->path_map[cpu]; | |
30 | ||
31 | if (!pi) | |
32 | return; | |
33 | ||
34 | if (refcount_dec_and_test(&pi->refcount)) { | |
35 | cpumask_clear_cpu(cpu, s->path_mask); | |
36 | free_cpumask_var(pi->cpumask); | |
37 | kfree(pi); | |
38 | ||
39 | s->path_map[cpu] = NULL; | |
40 | } | |
41 | } | |
42 | ||
43 | static int ioa_add_path(struct path_selector *ps, struct dm_path *path, | |
44 | int argc, char **argv, char **error) | |
45 | { | |
46 | struct selector *s = ps->context; | |
47 | struct path_info *pi = NULL; | |
48 | unsigned int cpu; | |
49 | int ret; | |
50 | ||
51 | if (argc != 1) { | |
52 | *error = "io-affinity ps: invalid number of arguments"; | |
53 | return -EINVAL; | |
54 | } | |
55 | ||
56 | pi = kzalloc(sizeof(*pi), GFP_KERNEL); | |
57 | if (!pi) { | |
58 | *error = "io-affinity ps: Error allocating path context"; | |
59 | return -ENOMEM; | |
60 | } | |
61 | ||
62 | pi->path = path; | |
63 | path->pscontext = pi; | |
64 | refcount_set(&pi->refcount, 1); | |
65 | ||
66 | if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) { | |
67 | *error = "io-affinity ps: Error allocating cpumask context"; | |
68 | ret = -ENOMEM; | |
69 | goto free_pi; | |
70 | } | |
71 | ||
72 | ret = cpumask_parse(argv[0], pi->cpumask); | |
73 | if (ret) { | |
74 | *error = "io-affinity ps: invalid cpumask"; | |
75 | ret = -EINVAL; | |
76 | goto free_mask; | |
77 | } | |
78 | ||
79 | for_each_cpu(cpu, pi->cpumask) { | |
80 | if (cpu >= nr_cpu_ids) { | |
81 | DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u", | |
82 | cpu, nr_cpu_ids); | |
83 | break; | |
84 | } | |
85 | ||
86 | if (s->path_map[cpu]) { | |
87 | DMWARN("CPU mapping for %u exists. Ignoring.", cpu); | |
88 | continue; | |
89 | } | |
90 | ||
91 | cpumask_set_cpu(cpu, s->path_mask); | |
92 | s->path_map[cpu] = pi; | |
93 | refcount_inc(&pi->refcount); | |
e4d2e82b MC |
94 | } |
95 | ||
96 | if (refcount_dec_and_test(&pi->refcount)) { | |
97 | *error = "io-affinity ps: No new/valid CPU mapping found"; | |
98 | ret = -EINVAL; | |
99 | goto free_mask; | |
100 | } | |
101 | ||
102 | return 0; | |
103 | ||
104 | free_mask: | |
105 | free_cpumask_var(pi->cpumask); | |
106 | free_pi: | |
107 | kfree(pi); | |
108 | return ret; | |
109 | } | |
110 | ||
86a3238c | 111 | static int ioa_create(struct path_selector *ps, unsigned int argc, char **argv) |
e4d2e82b MC |
112 | { |
113 | struct selector *s; | |
114 | ||
115 | s = kmalloc(sizeof(*s), GFP_KERNEL); | |
116 | if (!s) | |
117 | return -ENOMEM; | |
118 | ||
119 | s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *), | |
120 | GFP_KERNEL); | |
121 | if (!s->path_map) | |
122 | goto free_selector; | |
123 | ||
124 | if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL)) | |
125 | goto free_map; | |
126 | ||
127 | atomic_set(&s->map_misses, 0); | |
128 | ps->context = s; | |
129 | return 0; | |
130 | ||
131 | free_map: | |
132 | kfree(s->path_map); | |
133 | free_selector: | |
134 | kfree(s); | |
135 | return -ENOMEM; | |
136 | } | |
137 | ||
138 | static void ioa_destroy(struct path_selector *ps) | |
139 | { | |
140 | struct selector *s = ps->context; | |
86a3238c | 141 | unsigned int cpu; |
e4d2e82b MC |
142 | |
143 | for_each_cpu(cpu, s->path_mask) | |
144 | ioa_free_path(s, cpu); | |
145 | ||
146 | free_cpumask_var(s->path_mask); | |
147 | kfree(s->path_map); | |
148 | kfree(s); | |
149 | ||
150 | ps->context = NULL; | |
151 | } | |
152 | ||
153 | static int ioa_status(struct path_selector *ps, struct dm_path *path, | |
154 | status_type_t type, char *result, unsigned int maxlen) | |
155 | { | |
156 | struct selector *s = ps->context; | |
157 | struct path_info *pi; | |
158 | int sz = 0; | |
159 | ||
160 | if (!path) { | |
161 | DMEMIT("0 "); | |
162 | return sz; | |
163 | } | |
164 | ||
165 | switch(type) { | |
166 | case STATUSTYPE_INFO: | |
167 | DMEMIT("%d ", atomic_read(&s->map_misses)); | |
168 | break; | |
169 | case STATUSTYPE_TABLE: | |
170 | pi = path->pscontext; | |
171 | DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); | |
172 | break; | |
8ec45662 TS |
173 | case STATUSTYPE_IMA: |
174 | *result = '\0'; | |
175 | break; | |
e4d2e82b MC |
176 | } |
177 | ||
178 | return sz; | |
179 | } | |
180 | ||
181 | static void ioa_fail_path(struct path_selector *ps, struct dm_path *p) | |
182 | { | |
183 | struct path_info *pi = p->pscontext; | |
184 | ||
185 | pi->failed = true; | |
186 | } | |
187 | ||
188 | static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p) | |
189 | { | |
190 | struct path_info *pi = p->pscontext; | |
191 | ||
192 | pi->failed = false; | |
193 | return 0; | |
194 | } | |
195 | ||
196 | static struct dm_path *ioa_select_path(struct path_selector *ps, | |
197 | size_t nr_bytes) | |
198 | { | |
199 | unsigned int cpu, node; | |
200 | struct selector *s = ps->context; | |
201 | const struct cpumask *cpumask; | |
202 | struct path_info *pi; | |
203 | int i; | |
204 | ||
205 | cpu = get_cpu(); | |
206 | ||
207 | pi = s->path_map[cpu]; | |
208 | if (pi && !pi->failed) | |
209 | goto done; | |
210 | ||
211 | /* | |
212 | * Perf is not optimal, but we at least try the local node then just | |
213 | * try not to fail. | |
214 | */ | |
215 | if (!pi) | |
216 | atomic_inc(&s->map_misses); | |
217 | ||
218 | node = cpu_to_node(cpu); | |
219 | cpumask = cpumask_of_node(node); | |
220 | for_each_cpu(i, cpumask) { | |
221 | pi = s->path_map[i]; | |
222 | if (pi && !pi->failed) | |
223 | goto done; | |
224 | } | |
225 | ||
226 | for_each_cpu(i, s->path_mask) { | |
227 | pi = s->path_map[i]; | |
228 | if (pi && !pi->failed) | |
229 | goto done; | |
230 | } | |
231 | pi = NULL; | |
232 | ||
233 | done: | |
234 | put_cpu(); | |
235 | return pi ? pi->path : NULL; | |
236 | } | |
237 | ||
238 | static struct path_selector_type ioa_ps = { | |
239 | .name = "io-affinity", | |
240 | .module = THIS_MODULE, | |
241 | .table_args = 1, | |
242 | .info_args = 1, | |
243 | .create = ioa_create, | |
244 | .destroy = ioa_destroy, | |
245 | .status = ioa_status, | |
246 | .add_path = ioa_add_path, | |
247 | .fail_path = ioa_fail_path, | |
248 | .reinstate_path = ioa_reinstate_path, | |
249 | .select_path = ioa_select_path, | |
250 | }; | |
251 | ||
252 | static int __init dm_ioa_init(void) | |
253 | { | |
254 | int ret = dm_register_path_selector(&ioa_ps); | |
255 | ||
256 | if (ret < 0) | |
257 | DMERR("register failed %d", ret); | |
258 | return ret; | |
259 | } | |
260 | ||
261 | static void __exit dm_ioa_exit(void) | |
262 | { | |
263 | int ret = dm_unregister_path_selector(&ioa_ps); | |
264 | ||
265 | if (ret < 0) | |
266 | DMERR("unregister failed %d", ret); | |
267 | } | |
268 | ||
269 | module_init(dm_ioa_init); | |
270 | module_exit(dm_ioa_exit); | |
271 | ||
272 | MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on"); | |
273 | MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>"); | |
274 | MODULE_LICENSE("GPL"); |