Commit | Line | Data |
---|---|---|
3f317499 YG |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* | |
3 | * AMD Address Translation Library | |
4 | * | |
5 | * denormalize.c : Functions to account for interleaving bits | |
6 | * | |
7 | * Copyright (c) 2023, Advanced Micro Devices, Inc. | |
8 | * All Rights Reserved. | |
9 | * | |
10 | * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> | |
11 | */ | |
12 | ||
13 | #include "internal.h" | |
14 | ||
15 | /* | |
16 | * Returns the Destination Fabric ID. This is the first (lowest) | |
17 | * COH_ST Fabric ID used within a DRAM Address map. | |
18 | */ | |
19 | static u16 get_dst_fabric_id(struct addr_ctx *ctx) | |
20 | { | |
21 | switch (df_cfg.rev) { | |
22 | case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit); | |
23 | case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); | |
24 | case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit); | |
25 | case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl); | |
26 | case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl); | |
27 | default: | |
28 | atl_debug_on_bad_df_rev(); | |
29 | return 0; | |
30 | } | |
31 | } | |
32 | ||
33 | /* | |
34 | * Make a contiguous gap in address for N bits starting at bit P. | |
35 | * | |
36 | * Example: | |
37 | * address bits: [20:0] | |
38 | * # of interleave bits (n): 3 | |
39 | * starting interleave bit (p): 8 | |
40 | * | |
41 | * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0] | |
42 | * [23 : 11][10 : 8][7 : 0] | |
43 | */ | |
44 | static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx) | |
45 | { | |
46 | return expand_bits(ctx->map.intlv_bit_pos, | |
47 | ctx->map.total_intlv_bits, | |
48 | ctx->ret_addr); | |
49 | } | |
50 | ||
51 | /* | |
52 | * Make two gaps in address for N bits. | |
53 | * First gap is a single bit at bit P. | |
54 | * Second gap is the remaining N-1 bits at bit 12. | |
55 | * | |
56 | * Example: | |
57 | * address bits: [20:0] | |
58 | * # of interleave bits (n): 3 | |
59 | * starting interleave bit (p): 8 | |
60 | * | |
61 | * First gap | |
62 | * expanded address bits: [20+1 : p+1][p][p-1 : 0] | |
63 | * [21 : 9][8][7 : 0] | |
64 | * | |
65 | * Second gap uses result from first. | |
66 | * r = n - 1; remaining interleave bits | |
67 | * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0] | |
68 | * [23 : 14][13 : 12][11 : 0] | |
69 | */ | |
70 | static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) | |
71 | { | |
72 | /* Make a single space at the interleave bit. */ | |
73 | u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr); | |
74 | ||
75 | /* Done if there's only a single interleave bit. */ | |
76 | if (ctx->map.total_intlv_bits <= 1) | |
77 | return denorm_addr; | |
78 | ||
79 | /* Make spaces for the remaining interleave bits starting at bit 12. */ | |
80 | return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); | |
81 | } | |
82 | ||
453f0ae7 M |
83 | /* |
84 | * Make space for CS ID at bits [14:8] as follows: | |
85 | * | |
86 | * 8 channels -> bits [10:8] | |
87 | * 16 channels -> bits [11:8] | |
88 | * 32 channels -> bits [14,11:8] | |
89 | * | |
90 | * 1 die -> N/A | |
91 | * 2 dies -> bit [12] | |
92 | * 4 dies -> bits [13:12] | |
93 | */ | |
94 | static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx) | |
95 | { | |
96 | u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); | |
97 | u64 denorm_addr; | |
98 | ||
99 | if (ctx->map.intlv_bit_pos != 8) { | |
100 | pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); | |
101 | return ~0ULL; | |
102 | } | |
103 | ||
104 | /* Channel bits. Covers up to 4 bits at [11:8]. */ | |
105 | denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr); | |
106 | ||
107 | /* Die bits. Always starts at [12]. */ | |
108 | denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr); | |
109 | ||
110 | /* Additional channel bit at [14]. */ | |
111 | if (num_intlv_bits > 4) | |
112 | denorm_addr = expand_bits(14, 1, denorm_addr); | |
113 | ||
114 | return denorm_addr; | |
115 | } | |
116 | ||
3f317499 YG |
117 | /* |
118 | * Take the current calculated address and shift enough bits in the middle | |
119 | * to make a gap where the interleave bits will be inserted. | |
120 | */ | |
121 | static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) | |
122 | { | |
123 | switch (ctx->map.intlv_mode) { | |
124 | case NOHASH_2CHAN: | |
125 | case NOHASH_4CHAN: | |
126 | case NOHASH_8CHAN: | |
127 | case NOHASH_16CHAN: | |
128 | case NOHASH_32CHAN: | |
129 | case DF2_2CHAN_HASH: | |
130 | return make_space_for_coh_st_id_at_intlv_bit(ctx); | |
131 | ||
132 | case DF3_COD4_2CHAN_HASH: | |
133 | case DF3_COD2_4CHAN_HASH: | |
134 | case DF3_COD1_8CHAN_HASH: | |
135 | case DF4_NPS4_2CHAN_HASH: | |
136 | case DF4_NPS2_4CHAN_HASH: | |
137 | case DF4_NPS1_8CHAN_HASH: | |
138 | case DF4p5_NPS4_2CHAN_1K_HASH: | |
139 | case DF4p5_NPS4_2CHAN_2K_HASH: | |
140 | case DF4p5_NPS2_4CHAN_2K_HASH: | |
141 | case DF4p5_NPS1_8CHAN_2K_HASH: | |
142 | case DF4p5_NPS1_16CHAN_2K_HASH: | |
143 | return make_space_for_coh_st_id_split_2_1(ctx); | |
453f0ae7 M |
144 | |
145 | case MI3_HASH_8CHAN: | |
146 | case MI3_HASH_16CHAN: | |
147 | case MI3_HASH_32CHAN: | |
148 | return make_space_for_coh_st_id_mi300(ctx); | |
149 | ||
3f317499 YG |
150 | default: |
151 | atl_debug_on_bad_intlv_mode(ctx); | |
152 | return ~0ULL; | |
153 | } | |
154 | } | |
155 | ||
156 | static u16 get_coh_st_id_df2(struct addr_ctx *ctx) | |
157 | { | |
158 | u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets); | |
159 | u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies); | |
160 | u8 num_intlv_bits; | |
161 | u16 coh_st_id, mask; | |
162 | ||
163 | coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); | |
164 | ||
165 | /* Channel interleave bits */ | |
166 | num_intlv_bits = order_base_2(ctx->map.num_intlv_chan); | |
167 | mask = GENMASK(num_intlv_bits - 1, 0); | |
168 | coh_st_id &= mask; | |
169 | ||
170 | /* Die interleave bits */ | |
171 | if (num_die_intlv_bits) { | |
172 | u16 die_bits; | |
173 | ||
174 | mask = GENMASK(num_die_intlv_bits - 1, 0); | |
175 | die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask; | |
176 | die_bits >>= df_cfg.die_id_shift; | |
177 | ||
178 | coh_st_id |= (die_bits & mask) << num_intlv_bits; | |
179 | num_intlv_bits += num_die_intlv_bits; | |
180 | } | |
181 | ||
182 | /* Socket interleave bits */ | |
183 | if (num_socket_intlv_bits) { | |
184 | u16 socket_bits; | |
185 | ||
186 | mask = GENMASK(num_socket_intlv_bits - 1, 0); | |
187 | socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; | |
188 | socket_bits >>= df_cfg.socket_id_shift; | |
189 | ||
190 | coh_st_id |= (socket_bits & mask) << num_intlv_bits; | |
191 | } | |
192 | ||
193 | return coh_st_id; | |
194 | } | |
195 | ||
196 | static u16 get_coh_st_id_df4(struct addr_ctx *ctx) | |
197 | { | |
198 | /* | |
199 | * Start with the original component mask and the number of interleave | |
200 | * bits for the channels in this map. | |
201 | */ | |
202 | u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); | |
203 | u16 mask = df_cfg.component_id_mask; | |
204 | ||
205 | u16 socket_bits; | |
206 | ||
207 | /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */ | |
208 | u16 coh_st_id = ctx->coh_st_fabric_id & mask; | |
209 | ||
210 | /* | |
211 | * Subtract the "base" Destination Fabric ID. | |
212 | * This accounts for systems with disabled Coherent Stations. | |
213 | */ | |
214 | coh_st_id -= get_dst_fabric_id(ctx) & mask; | |
215 | ||
216 | /* | |
217 | * Generate and use a new mask based on the number of bits | |
218 | * needed for channel interleaving in this map. | |
219 | */ | |
220 | mask = GENMASK(num_intlv_bits - 1, 0); | |
221 | coh_st_id &= mask; | |
222 | ||
223 | /* Done if socket interleaving is not enabled. */ | |
224 | if (ctx->map.num_intlv_sockets <= 1) | |
225 | return coh_st_id; | |
226 | ||
227 | /* | |
228 | * Figure out how many bits are needed for the number of | |
229 | * interleaved sockets. And shift the derived Coherent Station ID to account | |
230 | * for these. | |
231 | */ | |
232 | num_intlv_bits = ilog2(ctx->map.num_intlv_sockets); | |
233 | coh_st_id <<= num_intlv_bits; | |
234 | ||
235 | /* Generate a new mask for the socket interleaving bits. */ | |
236 | mask = GENMASK(num_intlv_bits - 1, 0); | |
237 | ||
238 | /* Get the socket interleave bits from the original Coherent Station Fabric ID. */ | |
239 | socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift; | |
240 | ||
241 | /* Apply the appropriate socket bits to the derived Coherent Station ID. */ | |
242 | coh_st_id |= socket_bits & mask; | |
243 | ||
244 | return coh_st_id; | |
245 | } | |
246 | ||
453f0ae7 M |
247 | /* |
248 | * MI300 hash has: | |
249 | * (C)hannel[3:0] = coh_st_id[3:0] | |
250 | * (S)tack[0] = coh_st_id[4] | |
251 | * (D)ie[1:0] = coh_st_id[6:5] | |
252 | * | |
253 | * Hashed coh_st_id is swizzled so that Stack bit is at the end. | |
254 | * coh_st_id = SDDCCCC | |
255 | */ | |
256 | static u16 get_coh_st_id_mi300(struct addr_ctx *ctx) | |
257 | { | |
258 | u8 channel_bits, die_bits, stack_bit; | |
259 | u16 die_id; | |
260 | ||
261 | /* Subtract the "base" Destination Fabric ID. */ | |
262 | ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx); | |
263 | ||
264 | die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift; | |
265 | ||
266 | channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id); | |
267 | stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6; | |
268 | die_bits = die_id << 4; | |
269 | ||
270 | return stack_bit | die_bits | channel_bits; | |
271 | } | |
272 | ||
3f317499 YG |
273 | /* |
274 | * Derive the correct Coherent Station ID that represents the interleave bits | |
275 | * used within the system physical address. This accounts for the | |
276 | * interleave mode, number of interleaved channels/dies/sockets, and | |
277 | * other system/mode-specific bit swizzling. | |
278 | * | |
279 | * Returns: Coherent Station ID on success. | |
280 | * All bits set on error. | |
281 | */ | |
282 | static u16 calculate_coh_st_id(struct addr_ctx *ctx) | |
283 | { | |
284 | switch (ctx->map.intlv_mode) { | |
285 | case NOHASH_2CHAN: | |
286 | case NOHASH_4CHAN: | |
287 | case NOHASH_8CHAN: | |
288 | case NOHASH_16CHAN: | |
289 | case NOHASH_32CHAN: | |
290 | case DF3_COD4_2CHAN_HASH: | |
291 | case DF3_COD2_4CHAN_HASH: | |
292 | case DF3_COD1_8CHAN_HASH: | |
293 | case DF2_2CHAN_HASH: | |
294 | return get_coh_st_id_df2(ctx); | |
295 | ||
296 | case DF4_NPS4_2CHAN_HASH: | |
297 | case DF4_NPS2_4CHAN_HASH: | |
298 | case DF4_NPS1_8CHAN_HASH: | |
299 | case DF4p5_NPS4_2CHAN_1K_HASH: | |
300 | case DF4p5_NPS4_2CHAN_2K_HASH: | |
301 | case DF4p5_NPS2_4CHAN_2K_HASH: | |
302 | case DF4p5_NPS1_8CHAN_2K_HASH: | |
303 | case DF4p5_NPS1_16CHAN_2K_HASH: | |
304 | return get_coh_st_id_df4(ctx); | |
305 | ||
453f0ae7 M |
306 | case MI3_HASH_8CHAN: |
307 | case MI3_HASH_16CHAN: | |
308 | case MI3_HASH_32CHAN: | |
309 | return get_coh_st_id_mi300(ctx); | |
310 | ||
3f317499 YG |
311 | /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ |
312 | case DF4p5_NPS2_4CHAN_1K_HASH: | |
313 | case DF4p5_NPS1_8CHAN_1K_HASH: | |
314 | case DF4p5_NPS1_16CHAN_1K_HASH: | |
315 | return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); | |
316 | ||
317 | default: | |
318 | atl_debug_on_bad_intlv_mode(ctx); | |
319 | return ~0; | |
320 | } | |
321 | } | |
322 | ||
323 | static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) | |
324 | { | |
325 | return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos); | |
326 | } | |
327 | ||
328 | static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) | |
329 | { | |
330 | /* Insert coh_st_id[0] at the interleave bit. */ | |
331 | denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos; | |
332 | ||
333 | /* Insert coh_st_id[2:1] at bit 12. */ | |
334 | denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11; | |
335 | ||
336 | return denorm_addr; | |
337 | } | |
338 | ||
339 | static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) | |
340 | { | |
341 | /* Insert coh_st_id[1:0] at bit 8. */ | |
342 | denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8; | |
343 | ||
344 | /* | |
345 | * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3. | |
346 | * Grab both because bit 3 will be clear if unused. | |
347 | */ | |
348 | denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10; | |
349 | ||
350 | return denorm_addr; | |
351 | } | |
352 | ||
353 | static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) | |
354 | { | |
355 | switch (ctx->map.intlv_mode) { | |
356 | case NOHASH_2CHAN: | |
357 | case NOHASH_4CHAN: | |
358 | case NOHASH_8CHAN: | |
359 | case NOHASH_16CHAN: | |
360 | case NOHASH_32CHAN: | |
453f0ae7 M |
361 | case MI3_HASH_8CHAN: |
362 | case MI3_HASH_16CHAN: | |
363 | case MI3_HASH_32CHAN: | |
3f317499 YG |
364 | case DF2_2CHAN_HASH: |
365 | return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); | |
366 | ||
367 | case DF3_COD4_2CHAN_HASH: | |
368 | case DF3_COD2_4CHAN_HASH: | |
369 | case DF3_COD1_8CHAN_HASH: | |
370 | case DF4_NPS4_2CHAN_HASH: | |
371 | case DF4_NPS2_4CHAN_HASH: | |
372 | case DF4_NPS1_8CHAN_HASH: | |
373 | case DF4p5_NPS4_2CHAN_1K_HASH: | |
374 | case DF4p5_NPS4_2CHAN_2K_HASH: | |
375 | case DF4p5_NPS2_4CHAN_2K_HASH: | |
376 | case DF4p5_NPS1_8CHAN_2K_HASH: | |
377 | case DF4p5_NPS1_16CHAN_2K_HASH: | |
378 | return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id); | |
379 | ||
380 | case DF4p5_NPS2_4CHAN_1K_HASH: | |
381 | case DF4p5_NPS1_8CHAN_1K_HASH: | |
382 | case DF4p5_NPS1_16CHAN_1K_HASH: | |
383 | return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id); | |
384 | ||
385 | default: | |
386 | atl_debug_on_bad_intlv_mode(ctx); | |
387 | return ~0ULL; | |
388 | } | |
389 | } | |
390 | ||
453f0ae7 M |
391 | /* |
392 | * MI300 systems have a fixed, hardware-defined physical-to-logical | |
393 | * Coherent Station mapping. The Remap registers are not used. | |
394 | */ | |
395 | static const u16 phy_to_log_coh_st_map_mi300[] = { | |
396 | 12, 13, 14, 15, | |
397 | 8, 9, 10, 11, | |
398 | 4, 5, 6, 7, | |
399 | 0, 1, 2, 3, | |
400 | 28, 29, 30, 31, | |
401 | 24, 25, 26, 27, | |
402 | 20, 21, 22, 23, | |
403 | 16, 17, 18, 19, | |
404 | }; | |
405 | ||
406 | static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) | |
407 | { | |
408 | if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) { | |
409 | atl_debug(ctx, "Instance ID out of range"); | |
410 | return ~0; | |
411 | } | |
412 | ||
413 | return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift); | |
414 | } | |
415 | ||
3f317499 YG |
416 | static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) |
417 | { | |
418 | u16 component_id, log_fabric_id; | |
419 | ||
420 | /* Start with the physical COH_ST Fabric ID. */ | |
421 | u16 phys_fabric_id = ctx->coh_st_fabric_id; | |
422 | ||
453f0ae7 M |
423 | if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) |
424 | return get_logical_coh_st_fabric_id_mi300(ctx); | |
425 | ||
3f317499 YG |
426 | /* Skip logical ID lookup if remapping is disabled. */ |
427 | if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && | |
428 | ctx->map.intlv_mode != DF3_6CHAN) | |
429 | return phys_fabric_id; | |
430 | ||
431 | /* Mask off the Node ID bits to get the "local" Component ID. */ | |
432 | component_id = phys_fabric_id & df_cfg.component_id_mask; | |
433 | ||
434 | /* | |
435 | * Search the list of logical Component IDs for the one that | |
436 | * matches this physical Component ID. | |
437 | */ | |
438 | for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) { | |
439 | if (ctx->map.remap_array[log_fabric_id] == component_id) | |
440 | break; | |
441 | } | |
442 | ||
443 | if (log_fabric_id == MAX_COH_ST_CHANNELS) | |
444 | atl_debug(ctx, "COH_ST remap entry not found for 0x%x", | |
445 | log_fabric_id); | |
446 | ||
447 | /* Get the Node ID bits from the physical and apply to the logical. */ | |
448 | return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; | |
449 | } | |
450 | ||
451 | static int denorm_addr_common(struct addr_ctx *ctx) | |
452 | { | |
453 | u64 denorm_addr; | |
454 | u16 coh_st_id; | |
455 | ||
456 | /* | |
457 | * Convert the original physical COH_ST Fabric ID to a logical value. | |
458 | * This is required for non-power-of-two and other interleaving modes. | |
459 | */ | |
460 | ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx); | |
461 | ||
462 | denorm_addr = make_space_for_coh_st_id(ctx); | |
463 | coh_st_id = calculate_coh_st_id(ctx); | |
464 | ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id); | |
465 | return 0; | |
466 | } | |
467 | ||
468 | static int denorm_addr_df3_6chan(struct addr_ctx *ctx) | |
469 | { | |
470 | u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask; | |
471 | u8 total_intlv_bits = ctx->map.total_intlv_bits; | |
472 | u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos; | |
473 | u64 msb_intlv_bits, temp_addr_a, temp_addr_b; | |
474 | u8 np2_bits = ctx->map.np2_bits; | |
475 | ||
476 | if (ctx->map.intlv_mode != DF3_6CHAN) | |
477 | return -EINVAL; | |
478 | ||
479 | /* | |
480 | * 'np2_bits' holds the number of bits needed to cover the | |
481 | * amount of memory (rounded up) in this map using 64K chunks. | |
482 | * | |
483 | * Example: | |
484 | * Total memory in map: 6GB | |
485 | * Rounded up to next power-of-2: 8GB | |
486 | * Number of 64K chunks: 0x20000 | |
487 | * np2_bits = log2(# of chunks): 17 | |
488 | * | |
489 | * Get the two most-significant interleave bits from the | |
490 | * input address based on the following: | |
491 | * | |
492 | * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits] | |
493 | */ | |
494 | low_bit = 14 + np2_bits - total_intlv_bits; | |
495 | msb_intlv_bits = ctx->ret_addr >> low_bit; | |
496 | msb_intlv_bits &= 0x3; | |
497 | ||
498 | /* | |
499 | * If MSB are 11b, then logical COH_ST ID is 6 or 7. | |
500 | * Need to adjust based on the mod3 result. | |
501 | */ | |
502 | if (msb_intlv_bits == 3) { | |
503 | u8 addr_mod, phys_addr_msb, msb_coh_st_id; | |
504 | ||
505 | /* Get the remaining interleave bits from the input address. */ | |
506 | temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr; | |
507 | temp_addr_b >>= intlv_bit; | |
508 | ||
509 | /* Calculate the logical COH_ST offset based on mod3. */ | |
510 | addr_mod = temp_addr_b % 3; | |
511 | ||
512 | /* Get COH_ST ID bits [2:1]. */ | |
513 | msb_coh_st_id = (coh_st_id >> 1) & 0x3; | |
514 | ||
515 | /* Get the bit that starts the physical address bits. */ | |
516 | phys_addr_msb = (intlv_bit + np2_bits + 1); | |
517 | phys_addr_msb &= BIT(0); | |
518 | phys_addr_msb++; | |
519 | phys_addr_msb *= 3 - addr_mod + msb_coh_st_id; | |
520 | phys_addr_msb %= 3; | |
521 | ||
522 | /* Move the physical address MSB to the correct place. */ | |
523 | temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit); | |
524 | ||
525 | /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */ | |
526 | coh_st_id &= BIT(0); | |
527 | coh_st_id |= GENMASK(2, 1); | |
528 | } else { | |
529 | temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr; | |
530 | temp_addr_b >>= intlv_bit; | |
531 | } | |
532 | ||
533 | temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr; | |
534 | temp_addr_b <<= intlv_bit + total_intlv_bits; | |
535 | ||
536 | ctx->ret_addr = temp_addr_a | temp_addr_b; | |
537 | ctx->ret_addr |= coh_st_id << intlv_bit; | |
538 | return 0; | |
539 | } | |
540 | ||
541 | static int denorm_addr_df4_np2(struct addr_ctx *ctx) | |
542 | { | |
543 | bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; | |
544 | u16 group, group_offset, log_coh_st_offset; | |
545 | unsigned int mod_value, shift_value; | |
546 | u16 mask = df_cfg.component_id_mask; | |
547 | u64 temp_addr_a, temp_addr_b; | |
548 | u8 hash_pa8, hashed_bit; | |
549 | ||
550 | switch (ctx->map.intlv_mode) { | |
551 | case DF4_NPS4_3CHAN_HASH: | |
552 | mod_value = 3; | |
553 | shift_value = 13; | |
554 | break; | |
555 | case DF4_NPS2_6CHAN_HASH: | |
556 | mod_value = 3; | |
557 | shift_value = 12; | |
558 | break; | |
559 | case DF4_NPS1_12CHAN_HASH: | |
560 | mod_value = 3; | |
561 | shift_value = 11; | |
562 | break; | |
563 | case DF4_NPS2_5CHAN_HASH: | |
564 | mod_value = 5; | |
565 | shift_value = 13; | |
566 | break; | |
567 | case DF4_NPS1_10CHAN_HASH: | |
568 | mod_value = 5; | |
569 | shift_value = 12; | |
570 | break; | |
571 | default: | |
572 | atl_debug_on_bad_intlv_mode(ctx); | |
573 | return -EINVAL; | |
574 | }; | |
575 | ||
576 | if (ctx->map.num_intlv_sockets == 1) { | |
577 | hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; | |
578 | temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); | |
579 | } else { | |
580 | hash_pa8 = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask); | |
581 | hash_pa8 >>= df_cfg.socket_id_shift; | |
582 | temp_addr_a = ctx->ret_addr; | |
583 | } | |
584 | ||
585 | /* Make a gap for the real bit [8]. */ | |
586 | temp_addr_a = expand_bits(8, 1, temp_addr_a); | |
587 | ||
588 | /* Make an additional gap for bits [13:12], as appropriate.*/ | |
589 | if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH || | |
590 | ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) { | |
591 | temp_addr_a = expand_bits(13, 1, temp_addr_a); | |
592 | } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) { | |
593 | temp_addr_a = expand_bits(12, 2, temp_addr_a); | |
594 | } | |
595 | ||
596 | /* Keep bits [13:0]. */ | |
597 | temp_addr_a &= GENMASK_ULL(13, 0); | |
598 | ||
599 | /* Get the appropriate high bits. */ | |
600 | shift_value += 1 - ilog2(ctx->map.num_intlv_sockets); | |
601 | temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr; | |
602 | temp_addr_b >>= shift_value; | |
603 | temp_addr_b *= mod_value; | |
604 | ||
605 | /* | |
606 | * Coherent Stations are divided into groups. | |
607 | * | |
608 | * Multiples of 3 (mod3) are divided into quadrants. | |
609 | * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8] | |
610 | * [3, 4, 5] [9, 10, 11] | |
611 | * | |
612 | * Multiples of 5 (mod5) are divided into sides. | |
613 | * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] | |
614 | */ | |
615 | ||
616 | /* | |
617 | * Calculate the logical offset for the COH_ST within its DRAM Address map. | |
618 | * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then | |
619 | * log_coh_st_offset = 8 - 5 = 3 | |
620 | */ | |
621 | log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask); | |
622 | ||
623 | /* | |
624 | * Figure out the group number. | |
625 | * | |
626 | * Following above example, | |
627 | * log_coh_st_offset = 3 | |
628 | * mod_value = 5 | |
629 | * group = 3 / 5 = 0 | |
630 | */ | |
631 | group = log_coh_st_offset / mod_value; | |
632 | ||
633 | /* | |
634 | * Figure out the offset within the group. | |
635 | * | |
636 | * Following above example, | |
637 | * log_coh_st_offset = 3 | |
638 | * mod_value = 5 | |
639 | * group_offset = 3 % 5 = 3 | |
640 | */ | |
641 | group_offset = log_coh_st_offset % mod_value; | |
642 | ||
643 | /* Adjust group_offset if the hashed bit [8] is set. */ | |
644 | if (hash_pa8) { | |
645 | if (!group_offset) | |
646 | group_offset = mod_value - 1; | |
647 | else | |
648 | group_offset--; | |
649 | } | |
650 | ||
651 | /* Add in the group offset to the high bits. */ | |
652 | temp_addr_b += group_offset; | |
653 | ||
654 | /* Shift the high bits to the proper starting position. */ | |
655 | temp_addr_b <<= 14; | |
656 | ||
657 | /* Combine the high and low bits together. */ | |
658 | ctx->ret_addr = temp_addr_a | temp_addr_b; | |
659 | ||
660 | /* Account for hashing here instead of in dehash_address(). */ | |
661 | hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); | |
662 | hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); | |
663 | hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); | |
664 | ||
665 | hashed_bit = !!hash_pa8; | |
666 | hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); | |
667 | hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; | |
668 | hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; | |
669 | hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; | |
670 | ||
671 | ctx->ret_addr |= hashed_bit << 8; | |
672 | ||
673 | /* Done for 3 and 5 channel. */ | |
674 | if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH || | |
675 | ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH) | |
676 | return 0; | |
677 | ||
678 | /* Select the proper 'group' bit to use for Bit 13. */ | |
679 | if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) | |
680 | hashed_bit = !!(group & BIT(1)); | |
681 | else | |
682 | hashed_bit = group & BIT(0); | |
683 | ||
684 | hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; | |
685 | hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; | |
686 | hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; | |
687 | ||
688 | ctx->ret_addr |= hashed_bit << 13; | |
689 | ||
690 | /* Done for 6 and 10 channel. */ | |
691 | if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH) | |
692 | return 0; | |
693 | ||
694 | hashed_bit = group & BIT(0); | |
695 | hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; | |
696 | hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; | |
697 | hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; | |
698 | ||
699 | ctx->ret_addr |= hashed_bit << 12; | |
700 | return 0; | |
701 | } | |
702 | ||
703 | int denormalize_address(struct addr_ctx *ctx) | |
704 | { | |
705 | switch (ctx->map.intlv_mode) { | |
706 | case NONE: | |
707 | return 0; | |
708 | case DF4_NPS4_3CHAN_HASH: | |
709 | case DF4_NPS2_6CHAN_HASH: | |
710 | case DF4_NPS1_12CHAN_HASH: | |
711 | case DF4_NPS2_5CHAN_HASH: | |
712 | case DF4_NPS1_10CHAN_HASH: | |
713 | return denorm_addr_df4_np2(ctx); | |
714 | case DF3_6CHAN: | |
715 | return denorm_addr_df3_6chan(ctx); | |
716 | default: | |
717 | return denorm_addr_common(ctx); | |
718 | } | |
719 | } |