| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | /* |
| 3 | * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. |
| 4 | * Originally split out from the skx_edac driver. |
| 5 | * |
| 6 | * Copyright (c) 2018, Intel Corporation. |
| 7 | */ |
| 8 | |
| 9 | #ifndef _SKX_COMM_EDAC_H |
| 10 | #define _SKX_COMM_EDAC_H |
| 11 | |
| 12 | #include <linux/bits.h> |
| 13 | #include <asm/mce.h> |
| 14 | |
| 15 | #define MSG_SIZE 1024 |
| 16 | |
| 17 | /* |
| 18 | * Debug macros |
| 19 | */ |
| 20 | #define skx_printk(level, fmt, arg...) \ |
| 21 | edac_printk(level, "skx", fmt, ##arg) |
| 22 | |
| 23 | #define skx_mc_printk(mci, level, fmt, arg...) \ |
| 24 | edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) |
| 25 | |
| 26 | /* |
| 27 | * Get a bit field at register value <v>, from bit <lo> to bit <hi> |
| 28 | */ |
| 29 | #define GET_BITFIELD(v, lo, hi) \ |
| 30 | (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) |
| 31 | |
| 32 | #define SKX_NUM_IMC 2 /* Memory controllers per socket */ |
| 33 | #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ |
| 34 | #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ |
| 35 | |
| 36 | #define I10NM_NUM_DDR_IMC 12 |
| 37 | #define I10NM_NUM_DDR_CHANNELS 2 |
| 38 | #define I10NM_NUM_DDR_DIMMS 2 |
| 39 | |
| 40 | #define I10NM_NUM_HBM_IMC 16 |
| 41 | #define I10NM_NUM_HBM_CHANNELS 2 |
| 42 | #define I10NM_NUM_HBM_DIMMS 1 |
| 43 | |
| 44 | #define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) |
| 45 | #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) |
| 46 | #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) |
| 47 | |
| 48 | #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) |
| 49 | #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) |
| 50 | #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) |
| 51 | |
| 52 | #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) |
| 53 | #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) |
| 54 | |
| 55 | #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) |
| 56 | #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ |
| 57 | |
| 58 | /* |
| 59 | * According to Intel Architecture spec vol 3B, |
| 60 | * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" |
| 61 | * memory errors should fit one of these masks: |
| 62 | * 000f 0000 1mmm cccc (binary) |
| 63 | * 000f 0010 1mmm cccc (binary) [RAM used as cache] |
| 64 | * where: |
| 65 | * f = Correction Report Filtering Bit. If 1, subsequent errors |
| 66 | * won't be shown |
| 67 | * mmm = error type |
| 68 | * cccc = channel |
| 69 | */ |
| 70 | #define MCACOD_MEM_ERR_MASK 0xef80 |
| 71 | /* |
| 72 | * Errors from either the memory of the 1-level memory system or the |
| 73 | * 2nd level memory (the slow "far" memory) of the 2-level memory system. |
| 74 | */ |
| 75 | #define MCACOD_MEM_CTL_ERR 0x80 |
| 76 | /* |
| 77 | * Errors from the 1st level memory (the fast "near" memory as cache) |
| 78 | * of the 2-level memory system. |
| 79 | */ |
| 80 | #define MCACOD_EXT_MEM_ERR 0x280 |
| 81 | |
| 82 | /* Max RRL register sets per {,sub-,pseudo-}channel. */ |
| 83 | #define NUM_RRL_SET 4 |
| 84 | /* Max RRL registers per set. */ |
| 85 | #define NUM_RRL_REG 6 |
| 86 | /* Max correctable error count registers. */ |
| 87 | #define NUM_CECNT_REG 8 |
| 88 | |
| 89 | /* Modes of RRL register set. */ |
| 90 | enum rrl_mode { |
| 91 | /* Last read error from patrol scrub. */ |
| 92 | LRE_SCRUB, |
| 93 | /* Last read error from demand. */ |
| 94 | LRE_DEMAND, |
| 95 | /* First read error from patrol scrub. */ |
| 96 | FRE_SCRUB, |
| 97 | /* First read error from demand. */ |
| 98 | FRE_DEMAND, |
| 99 | }; |
| 100 | |
| 101 | /* RRL registers per {,sub-,pseudo-}channel. */ |
| 102 | struct reg_rrl { |
| 103 | /* RRL register parts. */ |
| 104 | int set_num, reg_num; |
| 105 | enum rrl_mode modes[NUM_RRL_SET]; |
| 106 | u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; |
| 107 | /* RRL register widths in byte per set. */ |
| 108 | u8 widths[NUM_RRL_REG]; |
| 109 | /* RRL control bits of the first register per set. */ |
| 110 | u32 v_mask; |
| 111 | u32 uc_mask; |
| 112 | u32 over_mask; |
| 113 | u32 en_patspr_mask; |
| 114 | u32 noover_mask; |
| 115 | u32 en_mask; |
| 116 | |
| 117 | /* CORRERRCNT register parts. */ |
| 118 | int cecnt_num; |
| 119 | u32 cecnt_offsets[NUM_CECNT_REG]; |
| 120 | u8 cecnt_widths[NUM_CECNT_REG]; |
| 121 | }; |
| 122 | |
| 123 | /* |
| 124 | * Each cpu socket contains some pci devices that provide global |
| 125 | * information, and also some that are local to each of the two |
| 126 | * memory controllers on the die. |
| 127 | */ |
| 128 | struct skx_dev { |
| 129 | struct list_head list; |
| 130 | u8 bus[4]; |
| 131 | int seg; |
| 132 | struct pci_dev *sad_all; |
| 133 | struct pci_dev *util_all; |
| 134 | struct pci_dev *uracu; /* for i10nm CPU */ |
| 135 | struct pci_dev *pcu_cr3; /* for HBM memory detection */ |
| 136 | u32 mcroute; |
| 137 | /* |
| 138 | * Some server BIOS may hide certain memory controllers, and the |
| 139 | * EDAC driver skips those hidden memory controllers. However, the |
| 140 | * ADXL still decodes memory error address using physical memory |
| 141 | * controller indices. The mapping table is used to convert the |
| 142 | * physical indices (reported by ADXL) to the logical indices |
| 143 | * (used the EDAC driver) of present memory controllers during the |
| 144 | * error handling process. |
| 145 | */ |
| 146 | u8 mc_mapping[NUM_IMC]; |
| 147 | struct skx_imc { |
| 148 | struct mem_ctl_info *mci; |
| 149 | struct pci_dev *mdev; /* for i10nm CPU */ |
| 150 | void __iomem *mbase; /* for i10nm CPU */ |
| 151 | int chan_mmio_sz; /* for i10nm CPU */ |
| 152 | int num_channels; /* channels per memory controller */ |
| 153 | int num_dimms; /* dimms per channel */ |
| 154 | bool hbm_mc; |
| 155 | u8 mc; /* system wide mc# */ |
| 156 | u8 lmc; /* socket relative mc# */ |
| 157 | u8 src_id; |
| 158 | struct skx_channel { |
| 159 | struct pci_dev *cdev; |
| 160 | struct pci_dev *edev; |
| 161 | /* |
| 162 | * Two groups of RRL control registers per channel to save default RRL |
| 163 | * settings of two {sub-,pseudo-}channels in Linux RRL control mode. |
| 164 | */ |
| 165 | u32 rrl_ctl[2][NUM_RRL_SET]; |
| 166 | struct skx_dimm { |
| 167 | u8 close_pg; |
| 168 | u8 bank_xor_enable; |
| 169 | u8 fine_grain_bank; |
| 170 | u8 rowbits; |
| 171 | u8 colbits; |
| 172 | } dimms[NUM_DIMMS]; |
| 173 | } chan[NUM_CHANNELS]; |
| 174 | } imc[NUM_IMC]; |
| 175 | }; |
| 176 | |
| 177 | struct skx_pvt { |
| 178 | struct skx_imc *imc; |
| 179 | }; |
| 180 | |
| 181 | enum type { |
| 182 | SKX, |
| 183 | I10NM, |
| 184 | SPR, |
| 185 | GNR |
| 186 | }; |
| 187 | |
| 188 | enum { |
| 189 | INDEX_SOCKET, |
| 190 | INDEX_MEMCTRL, |
| 191 | INDEX_CHANNEL, |
| 192 | INDEX_DIMM, |
| 193 | INDEX_CS, |
| 194 | INDEX_NM_FIRST, |
| 195 | INDEX_NM_MEMCTRL = INDEX_NM_FIRST, |
| 196 | INDEX_NM_CHANNEL, |
| 197 | INDEX_NM_DIMM, |
| 198 | INDEX_NM_CS, |
| 199 | INDEX_MAX |
| 200 | }; |
| 201 | |
| 202 | enum error_source { |
| 203 | ERR_SRC_1LM, |
| 204 | ERR_SRC_2LM_NM, |
| 205 | ERR_SRC_2LM_FM, |
| 206 | ERR_SRC_NOT_MEMORY, |
| 207 | }; |
| 208 | |
| 209 | #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) |
| 210 | #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) |
| 211 | #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) |
| 212 | #define BIT_NM_CS BIT_ULL(INDEX_NM_CS) |
| 213 | |
| 214 | struct decoded_addr { |
| 215 | struct mce *mce; |
| 216 | struct skx_dev *dev; |
| 217 | u64 addr; |
| 218 | int socket; |
| 219 | int imc; |
| 220 | int channel; |
| 221 | u64 chan_addr; |
| 222 | int sktways; |
| 223 | int chanways; |
| 224 | int dimm; |
| 225 | int cs; |
| 226 | int rank; |
| 227 | int channel_rank; |
| 228 | u64 rank_address; |
| 229 | int row; |
| 230 | int column; |
| 231 | int bank_address; |
| 232 | int bank_group; |
| 233 | bool decoded_by_adxl; |
| 234 | }; |
| 235 | |
| 236 | struct pci_bdf { |
| 237 | u32 bus : 8; |
| 238 | u32 dev : 5; |
| 239 | u32 fun : 3; |
| 240 | }; |
| 241 | |
| 242 | struct res_config { |
| 243 | enum type type; |
| 244 | /* Configuration agent device ID */ |
| 245 | unsigned int decs_did; |
| 246 | /* Default bus number configuration register offset */ |
| 247 | int busno_cfg_offset; |
| 248 | /* DDR memory controllers per socket */ |
| 249 | int ddr_imc_num; |
| 250 | /* DDR channels per DDR memory controller */ |
| 251 | int ddr_chan_num; |
| 252 | /* DDR DIMMs per DDR memory channel */ |
| 253 | int ddr_dimm_num; |
| 254 | /* Per DDR channel memory-mapped I/O size */ |
| 255 | int ddr_chan_mmio_sz; |
| 256 | /* HBM memory controllers per socket */ |
| 257 | int hbm_imc_num; |
| 258 | /* HBM channels per HBM memory controller */ |
| 259 | int hbm_chan_num; |
| 260 | /* HBM DIMMs per HBM memory channel */ |
| 261 | int hbm_dimm_num; |
| 262 | /* Per HBM channel memory-mapped I/O size */ |
| 263 | int hbm_chan_mmio_sz; |
| 264 | bool support_ddr5; |
| 265 | /* SAD device BDF */ |
| 266 | struct pci_bdf sad_all_bdf; |
| 267 | /* PCU device BDF */ |
| 268 | struct pci_bdf pcu_cr3_bdf; |
| 269 | /* UTIL device BDF */ |
| 270 | struct pci_bdf util_all_bdf; |
| 271 | /* URACU device BDF */ |
| 272 | struct pci_bdf uracu_bdf; |
| 273 | /* DDR mdev device BDF */ |
| 274 | struct pci_bdf ddr_mdev_bdf; |
| 275 | /* HBM mdev device BDF */ |
| 276 | struct pci_bdf hbm_mdev_bdf; |
| 277 | int sad_all_offset; |
| 278 | /* RRL register sets per DDR channel */ |
| 279 | struct reg_rrl *reg_rrl_ddr; |
| 280 | /* RRL register sets per HBM channel */ |
| 281 | struct reg_rrl *reg_rrl_hbm[2]; |
| 282 | }; |
| 283 | |
| 284 | typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, |
| 285 | struct res_config *cfg); |
| 286 | typedef bool (*skx_decode_f)(struct decoded_addr *res); |
| 287 | typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); |
| 288 | |
| 289 | int skx_adxl_get(void); |
| 290 | void skx_adxl_put(void); |
| 291 | void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); |
| 292 | void skx_set_mem_cfg(bool mem_cfg_2lm); |
| 293 | void skx_set_res_cfg(struct res_config *cfg); |
| 294 | void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); |
| 295 | |
| 296 | int skx_get_src_id(struct skx_dev *d, int off, u8 *id); |
| 297 | |
| 298 | int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); |
| 299 | |
| 300 | int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); |
| 301 | |
| 302 | int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, |
| 303 | struct skx_imc *imc, int chan, int dimmno, |
| 304 | struct res_config *cfg); |
| 305 | |
| 306 | int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, |
| 307 | int chan, int dimmno, const char *mod_str); |
| 308 | |
| 309 | int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, |
| 310 | const char *ctl_name, const char *mod_str, |
| 311 | get_dimm_config_f get_dimm_config, |
| 312 | struct res_config *cfg); |
| 313 | |
| 314 | int skx_mce_check_error(struct notifier_block *nb, unsigned long val, |
| 315 | void *data); |
| 316 | |
| 317 | void skx_remove(void); |
| 318 | |
| 319 | #ifdef CONFIG_EDAC_DEBUG |
| 320 | void skx_setup_debug(const char *name); |
| 321 | void skx_teardown_debug(void); |
| 322 | #else |
| 323 | static inline void skx_setup_debug(const char *name) {} |
| 324 | static inline void skx_teardown_debug(void) {} |
| 325 | #endif |
| 326 | |
| 327 | #endif /* _SKX_COMM_EDAC_H */ |