drivers/net/ethernet/mellanox/mlx5/core/health.c

   1 /*
   2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32
  33 #include <linux/kernel.h>
  34 #include <linux/random.h>
  35 #include <linux/vmalloc.h>
  36 #include <linux/hardirq.h>
  37 #include <linux/mlx5/driver.h>
  38 #include <linux/kern_levels.h>
  39 #include "mlx5_core.h"
  40 #include "lib/eq.h"
  41 #include "lib/mlx5.h"
  42 #include "lib/pci_vsc.h"
  43 #include "lib/tout.h"
  44 #include "diag/fw_tracer.h"
  45 #include "diag/reporter_vnic.h"
  46
  47 enum {
  48         MAX_MISSES                      = 3,
  49 };
  50
  51 enum {
  52         MLX5_HEALTH_SYNDR_FW_ERR                = 0x1,
  53         MLX5_HEALTH_SYNDR_IRISC_ERR             = 0x7,
  54         MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR  = 0x8,
  55         MLX5_HEALTH_SYNDR_CRC_ERR               = 0x9,
  56         MLX5_HEALTH_SYNDR_FETCH_PCI_ERR         = 0xa,
  57         MLX5_HEALTH_SYNDR_HW_FTL_ERR            = 0xb,
  58         MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR  = 0xc,
  59         MLX5_HEALTH_SYNDR_EQ_ERR                = 0xd,
  60         MLX5_HEALTH_SYNDR_EQ_INV                = 0xe,
  61         MLX5_HEALTH_SYNDR_FFSER_ERR             = 0xf,
  62         MLX5_HEALTH_SYNDR_HIGH_TEMP             = 0x10
  63 };
  64
  65 enum {
  66         MLX5_DROP_HEALTH_WORK,
  67 };
  68
  69 enum  {
  70         MLX5_SENSOR_NO_ERR              = 0,
  71         MLX5_SENSOR_PCI_COMM_ERR        = 1,
  72         MLX5_SENSOR_PCI_ERR             = 2,
  73         MLX5_SENSOR_NIC_DISABLED        = 3,
  74         MLX5_SENSOR_NIC_SW_RESET        = 4,
  75         MLX5_SENSOR_FW_SYND_RFR         = 5,
  76 };
  77
  78 enum {
  79         MLX5_SEVERITY_MASK              = 0x7,
  80         MLX5_SEVERITY_VALID_MASK        = 0x8,
  81 };
  82
  83 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
  84 {
  85         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
  86 }
  87
  88 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
  89 {
  90         u32 cur_cmdq_addr_l_sz;
  91
  92         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
  93         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
  94                     state << MLX5_NIC_IFC_OFFSET,
  95                     &dev->iseg->cmdq_addr_l_sz);
  96 }
  97
  98 static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
  99 {
 100         struct mlx5_core_health *health = &dev->priv.health;
 101         struct health_buffer __iomem *h = health->health;
 102
 103         /* Offline PCI reads return 0xffffffff */
 104         return (ioread32be(&h->fw_ver) == 0xffffffff);
 105 }
 106
 107 static int mlx5_health_get_rfr(u8 rfr_severity)
 108 {
 109         return rfr_severity >> MLX5_RFR_BIT_OFFSET;
 110 }
 111
 112 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
 113 {
 114         struct mlx5_core_health *health = &dev->priv.health;
 115         struct health_buffer __iomem *h = health->health;
 116         u8 synd = ioread8(&h->synd);
 117         u8 rfr;
 118
 119         rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
 120
 121         if (rfr && synd)
 122                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
 123         return rfr && synd;
 124 }
 125
 126 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
 127 {
 128         if (sensor_pci_not_working(dev))
 129                 return MLX5_SENSOR_PCI_COMM_ERR;
 130         if (pci_channel_offline(dev->pdev))
 131                 return MLX5_SENSOR_PCI_ERR;
 132         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
 133                 return MLX5_SENSOR_NIC_DISABLED;
 134         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
 135                 return MLX5_SENSOR_NIC_SW_RESET;
 136         if (sensor_fw_synd_rfr(dev))
 137                 return MLX5_SENSOR_FW_SYND_RFR;
 138
 139         return MLX5_SENSOR_NO_ERR;
 140 }
 141
 142 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
 143 {
 144         enum mlx5_vsc_state state;
 145         int ret;
 146
 147         if (!mlx5_core_is_pf(dev))
 148                 return -EBUSY;
 149
 150         /* Try to lock GW access, this stage doesn't return
 151          * EBUSY because locked GW does not mean that other PF
 152          * already started the reset.
 153          */
 154         ret = mlx5_vsc_gw_lock(dev);
 155         if (ret == -EBUSY)
 156                 return -EINVAL;
 157         if (ret)
 158                 return ret;
 159
 160         state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
 161         /* At this stage, if the return status == EBUSY, then we know
 162          * for sure that another PF started the reset, so don't allow
 163          * another reset.
 164          */
 165         ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
 166         if (ret)
 167                 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
 168
 169         /* Unlock GW access */
 170         mlx5_vsc_gw_unlock(dev);
 171
 172         return ret;
 173 }
 174
 175 static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
 176 {
 177         bool supported = (ioread32be(&dev->iseg->initializing) >>
 178                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
 179         u32 fatal_error;
 180
 181         if (!supported)
 182                 return false;
 183
 184         /* The reset only needs to be issued by one PF. The health buffer is
 185          * shared between all functions, and will be cleared during a reset.
 186          * Check again to avoid a redundant 2nd reset. If the fatal errors was
 187          * PCI related a reset won't help.
 188          */
 189         fatal_error = mlx5_health_check_fatal_sensors(dev);
 190         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
 191             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
 192             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
 193                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
 194                 return false;
 195         }
 196
 197         mlx5_core_warn(dev, "Issuing FW Reset\n");
 198         /* Write the NIC interface field to initiate the reset, the command
 199          * interface address also resides here, don't overwrite it.
 200          */
 201         mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
 202
 203         return true;
 204 }
 205
 206 static void enter_error_state(struct mlx5_core_dev *dev, bool force)
 207 {
 208         if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
 209                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 210                 mlx5_cmd_flush(dev);
 211         }
 212
 213         mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
 214 }
 215
 216 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 217 {
 218         bool err_detected = false;
 219
 220         /* Mark the device as fatal in order to abort FW commands */
 221         if ((mlx5_health_check_fatal_sensors(dev) || force) &&
 222             dev->state == MLX5_DEVICE_STATE_UP) {
 223                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 224                 err_detected = true;
 225         }
 226         mutex_lock(&dev->intf_state_mutex);
 227         if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 228                 goto unlock;/* a previous error is still being handled */
 229
 230         enter_error_state(dev, force);
 231 unlock:
 232         mutex_unlock(&dev->intf_state_mutex);
 233 }
 234
 235 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
 236 {
 237         unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
 238         int lock = -EBUSY;
 239
 240         mutex_lock(&dev->intf_state_mutex);
 241         if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
 242                 goto unlock;
 243
 244         mlx5_core_err(dev, "start\n");
 245
 246         if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
 247                 /* Get cr-dump and reset FW semaphore */
 248                 lock = lock_sem_sw_reset(dev, true);
 249
 250                 if (lock == -EBUSY) {
 251                         delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
 252                         goto recover_from_sw_reset;
 253                 }
 254                 /* Execute SW reset */
 255                 reset_fw_if_needed(dev);
 256         }
 257
 258 recover_from_sw_reset:
 259         /* Recover from SW reset */
 260         end = jiffies + msecs_to_jiffies(delay_ms);
 261         do {
 262                 if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
 263                         break;
 264
 265                 msleep(20);
 266         } while (!time_after(jiffies, end));
 267
 268         if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
 269                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
 270                         mlx5_get_nic_state(dev), delay_ms);
 271         }
 272
 273         /* Release FW semaphore if you are the lock owner */
 274         if (!lock)
 275                 lock_sem_sw_reset(dev, false);
 276
 277         mlx5_core_err(dev, "end\n");
 278
 279 unlock:
 280         mutex_unlock(&dev->intf_state_mutex);
 281 }
 282
 283 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 284 {
 285         u8 nic_interface = mlx5_get_nic_state(dev);
 286
 287         switch (nic_interface) {
 288         case MLX5_NIC_IFC_FULL:
 289                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
 290                 break;
 291
 292         case MLX5_NIC_IFC_DISABLED:
 293                 mlx5_core_warn(dev, "starting teardown\n");
 294                 break;
 295
 296         case MLX5_NIC_IFC_NO_DRAM_NIC:
 297                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
 298                 break;
 299
 300         case MLX5_NIC_IFC_SW_RESET:
 301                 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
 302                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
 303                  *    and this is a VF), this is not recoverable by SW reset.
 304                  *    Logging of this is handled elsewhere.
 305                  * 2. FW reset has been issued by another function, driver can
 306                  *    be reloaded to recover after the mode switches to
 307                  *    MLX5_NIC_IFC_DISABLED.
 308                  */
 309                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
 310                         mlx5_core_warn(dev, "NIC SW reset in progress\n");
 311                 break;
 312
 313         default:
 314                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
 315                                nic_interface);
 316         }
 317
 318         mlx5_disable_device(dev);
 319 }
 320
 321 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
 322 {
 323         unsigned long end;
 324
 325         end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
 326         while (sensor_pci_not_working(dev)) {
 327                 if (time_after(jiffies, end))
 328                         return -ETIMEDOUT;
 329                 if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
 330                         mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
 331                         return -ENODEV;
 332                 }
 333                 msleep(100);
 334         }
 335         return 0;
 336 }
 337
 338 static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
 339 {
 340         mlx5_core_warn(dev, "handling bad device here\n");
 341         mlx5_handle_bad_state(dev);
 342         if (mlx5_health_wait_pci_up(dev)) {
 343                 mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
 344                 return -EIO;
 345         }
 346         mlx5_core_err(dev, "starting health recovery flow\n");
 347         if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
 348                 mlx5_core_err(dev, "health recovery failed\n");
 349                 return -EIO;
 350         }
 351
 352         mlx5_core_info(dev, "health recovery succeeded\n");
 353         return 0;
 354 }
 355
 356 static const char *hsynd_str(u8 synd)
 357 {
 358         switch (synd) {
 359         case MLX5_HEALTH_SYNDR_FW_ERR:
 360                 return "firmware internal error";
 361         case MLX5_HEALTH_SYNDR_IRISC_ERR:
 362                 return "irisc not responding";
 363         case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
 364                 return "unrecoverable hardware error";
 365         case MLX5_HEALTH_SYNDR_CRC_ERR:
 366                 return "firmware CRC error";
 367         case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
 368                 return "ICM fetch PCI error";
 369         case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
 370                 return "HW fatal error\n";
 371         case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
 372                 return "async EQ buffer overrun";
 373         case MLX5_HEALTH_SYNDR_EQ_ERR:
 374                 return "EQ error";
 375         case MLX5_HEALTH_SYNDR_EQ_INV:
 376                 return "Invalid EQ referenced";
 377         case MLX5_HEALTH_SYNDR_FFSER_ERR:
 378                 return "FFSER error";
 379         case MLX5_HEALTH_SYNDR_HIGH_TEMP:
 380                 return "High temperature";
 381         default:
 382                 return "unrecognized error";
 383         }
 384 }
 385
 386 static const char *mlx5_loglevel_str(int level)
 387 {
 388         switch (level) {
 389         case LOGLEVEL_EMERG:
 390                 return "EMERGENCY";
 391         case LOGLEVEL_ALERT:
 392                 return "ALERT";
 393         case LOGLEVEL_CRIT:
 394                 return "CRITICAL";
 395         case LOGLEVEL_ERR:
 396                 return "ERROR";
 397         case LOGLEVEL_WARNING:
 398                 return "WARNING";
 399         case LOGLEVEL_NOTICE:
 400                 return "NOTICE";
 401         case LOGLEVEL_INFO:
 402                 return "INFO";
 403         case LOGLEVEL_DEBUG:
 404                 return "DEBUG";
 405         }
 406         return "Unknown log level";
 407 }
 408
 409 static int mlx5_health_get_severity(u8 rfr_severity)
 410 {
 411         return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
 412                rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
 413 }
 414
 415 static void print_health_info(struct mlx5_core_dev *dev)
 416 {
 417         struct mlx5_core_health *health = &dev->priv.health;
 418         struct health_buffer __iomem *h = health->health;
 419         u8 rfr_severity;
 420         int severity;
 421         int i;
 422
 423         /* If the syndrome is 0, the device is OK and no need to print buffer */
 424         if (!ioread8(&h->synd))
 425                 return;
 426
 427         if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
 428                 mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
 429                 return;
 430         }
 431
 432         rfr_severity = ioread8(&h->rfr_severity);
 433         severity  = mlx5_health_get_severity(rfr_severity);
 434         mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
 435                  hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
 436
 437         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
 438                 mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
 439                          ioread32be(h->assert_var + i));
 440
 441         mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
 442         mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
 443         mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
 444                  fw_rev_sub(dev));
 445         mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
 446         mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
 447         mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
 448         mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
 449         mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
 450         mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
 451                  hsynd_str(ioread8(&h->synd)));
 452         mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
 453         mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
 454 }
 455
 456 static int
 457 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 458                           struct devlink_fmsg *fmsg,
 459                           struct netlink_ext_ack *extack)
 460 {
 461         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 462         struct mlx5_core_health *health = &dev->priv.health;
 463         struct health_buffer __iomem *h = health->health;
 464         u8 synd;
 465         int err;
 466
 467         synd = ioread8(&h->synd);
 468         err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
 469         if (err || !synd)
 470                 return err;
 471         return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
 472 }
 473
 474 struct mlx5_fw_reporter_ctx {
 475         u8 err_synd;
 476         int miss_counter;
 477 };
 478
 479 static int
 480 mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
 481                                struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
 482 {
 483         int err;
 484
 485         err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
 486                                        fw_reporter_ctx->err_synd);
 487         if (err)
 488                 return err;
 489         err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
 490                                         fw_reporter_ctx->miss_counter);
 491         if (err)
 492                 return err;
 493         return 0;
 494 }
 495
 496 static int
 497 mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
 498                                        struct devlink_fmsg *fmsg)
 499 {
 500         struct mlx5_core_health *health = &dev->priv.health;
 501         struct health_buffer __iomem *h = health->health;
 502         u8 rfr_severity;
 503         int err;
 504         int i;
 505
 506         if (!ioread8(&h->synd))
 507                 return 0;
 508
 509         err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
 510         if (err)
 511                 return err;
 512         err = devlink_fmsg_obj_nest_start(fmsg);
 513         if (err)
 514                 return err;
 515         err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
 516         if (err)
 517                 return err;
 518
 519         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
 520                 err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
 521                 if (err)
 522                         return err;
 523         }
 524         err = devlink_fmsg_arr_pair_nest_end(fmsg);
 525         if (err)
 526                 return err;
 527         err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
 528                                         ioread32be(&h->assert_exit_ptr));
 529         if (err)
 530                 return err;
 531         err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
 532                                         ioread32be(&h->assert_callra));
 533         if (err)
 534                 return err;
 535         err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
 536         if (err)
 537                 return err;
 538         err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
 539         if (err)
 540                 return err;
 541         rfr_severity = ioread8(&h->rfr_severity);
 542         err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
 543         if (err)
 544                 return err;
 545         err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
 546         if (err)
 547                 return err;
 548         err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
 549                                        ioread8(&h->irisc_index));
 550         if (err)
 551                 return err;
 552         err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
 553         if (err)
 554                 return err;
 555         err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
 556                                         ioread16be(&h->ext_synd));
 557         if (err)
 558                 return err;
 559         err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
 560                                         ioread32be(&h->fw_ver));
 561         if (err)
 562                 return err;
 563         err = devlink_fmsg_obj_nest_end(fmsg);
 564         if (err)
 565                 return err;
 566         return devlink_fmsg_pair_nest_end(fmsg);
 567 }
 568
 569 static int
 570 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
 571                       struct devlink_fmsg *fmsg, void *priv_ctx,
 572                       struct netlink_ext_ack *extack)
 573 {
 574         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 575         int err;
 576
 577         err = mlx5_fw_tracer_trigger_core_dump_general(dev);
 578         if (err)
 579                 return err;
 580
 581         if (priv_ctx) {
 582                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
 583
 584                 err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
 585                 if (err)
 586                         return err;
 587         }
 588
 589         err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
 590         if (err)
 591                 return err;
 592         return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
 593 }
 594
 595 static void mlx5_fw_reporter_err_work(struct work_struct *work)
 596 {
 597         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
 598         struct mlx5_core_health *health;
 599
 600         health = container_of(work, struct mlx5_core_health, report_work);
 601
 602         if (IS_ERR_OR_NULL(health->fw_reporter))
 603                 return;
 604
 605         fw_reporter_ctx.err_synd = health->synd;
 606         fw_reporter_ctx.miss_counter = health->miss_counter;
 607         if (fw_reporter_ctx.err_synd) {
 608                 devlink_health_report(health->fw_reporter,
 609                                       "FW syndrome reported", &fw_reporter_ctx);
 610                 return;
 611         }
 612         if (fw_reporter_ctx.miss_counter)
 613                 devlink_health_report(health->fw_reporter,
 614                                       "FW miss counter reported",
 615                                       &fw_reporter_ctx);
 616 }
 617
 618 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
 619                 .name = "fw",
 620                 .diagnose = mlx5_fw_reporter_diagnose,
 621                 .dump = mlx5_fw_reporter_dump,
 622 };
 623
 624 static int
 625 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
 626                                void *priv_ctx,
 627                                struct netlink_ext_ack *extack)
 628 {
 629         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 630
 631         return mlx5_health_try_recover(dev);
 632 }
 633
 634 static int
 635 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
 636                             struct devlink_fmsg *fmsg, void *priv_ctx,
 637                             struct netlink_ext_ack *extack)
 638 {
 639         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 640         u32 crdump_size = dev->priv.health.crdump_size;
 641         u32 *cr_data;
 642         int err;
 643
 644         if (!mlx5_core_is_pf(dev))
 645                 return -EPERM;
 646
 647         cr_data = kvmalloc(crdump_size, GFP_KERNEL);
 648         if (!cr_data)
 649                 return -ENOMEM;
 650         err = mlx5_crdump_collect(dev, cr_data);
 651         if (err)
 652                 goto free_data;
 653
 654         if (priv_ctx) {
 655                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
 656
 657                 err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
 658                 if (err)
 659                         goto free_data;
 660         }
 661
 662         err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
 663
 664 free_data:
 665         kvfree(cr_data);
 666         return err;
 667 }
 668
 669 static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
 670 {
 671         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
 672         struct mlx5_core_health *health;
 673         struct mlx5_core_dev *dev;
 674         struct devlink *devlink;
 675         struct mlx5_priv *priv;
 676
 677         health = container_of(work, struct mlx5_core_health, fatal_report_work);
 678         priv = container_of(health, struct mlx5_priv, health);
 679         dev = container_of(priv, struct mlx5_core_dev, priv);
 680         devlink = priv_to_devlink(dev);
 681
 682         mutex_lock(&dev->intf_state_mutex);
 683         if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) {
 684                 mlx5_core_err(dev, "health works are not permitted at this stage\n");
 685                 mutex_unlock(&dev->intf_state_mutex);
 686                 return;
 687         }
 688         mutex_unlock(&dev->intf_state_mutex);
 689         enter_error_state(dev, false);
 690         if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
 691                 devl_lock(devlink);
 692                 if (mlx5_health_try_recover(dev))
 693                         mlx5_core_err(dev, "health recovery failed\n");
 694                 devl_unlock(devlink);
 695                 return;
 696         }
 697         fw_reporter_ctx.err_synd = health->synd;
 698         fw_reporter_ctx.miss_counter = health->miss_counter;
 699         if (devlink_health_report(health->fw_fatal_reporter,
 700                                   "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
 701                 /* If recovery wasn't performed, due to grace period,
 702                  * unload the driver. This ensures that the driver
 703                  * closes all its resources and it is not subjected to
 704                  * requests from the kernel.
 705                  */
 706                 mlx5_core_err(dev, "Driver is in error state. Unloading\n");
 707                 mlx5_unload_one(dev, false);
 708         }
 709 }
 710
 711 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
 712                 .name = "fw_fatal",
 713                 .recover = mlx5_fw_fatal_reporter_recover,
 714                 .dump = mlx5_fw_fatal_reporter_dump,
 715 };
 716
 717 #define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
 718 #define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
 719 #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
 720 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
 721
 722 static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
 723 {
 724         struct mlx5_core_health *health = &dev->priv.health;
 725         struct devlink *devlink = priv_to_devlink(dev);
 726         u64 grace_period;
 727
 728         if (mlx5_core_is_ecpf(dev)) {
 729                 grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
 730         } else if (mlx5_core_is_pf(dev)) {
 731                 grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
 732         } else {
 733                 /* VF or SF */
 734                 grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
 735         }
 736
 737         health->fw_reporter =
 738                 devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
 739                                                0, dev);
 740         if (IS_ERR(health->fw_reporter))
 741                 mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
 742                                PTR_ERR(health->fw_reporter));
 743
 744         health->fw_fatal_reporter =
 745                 devlink_health_reporter_create(devlink,
 746                                                &mlx5_fw_fatal_reporter_ops,
 747                                                grace_period,
 748                                                dev);
 749         if (IS_ERR(health->fw_fatal_reporter))
 750                 mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
 751                                PTR_ERR(health->fw_fatal_reporter));
 752 }
 753
 754 static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
 755 {
 756         struct mlx5_core_health *health = &dev->priv.health;
 757
 758         if (!IS_ERR_OR_NULL(health->fw_reporter))
 759                 devlink_health_reporter_destroy(health->fw_reporter);
 760
 761         if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
 762                 devlink_health_reporter_destroy(health->fw_fatal_reporter);
 763 }
 764
 765 static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
 766 {
 767         unsigned long next;
 768
 769         get_random_bytes(&next, sizeof(next));
 770         next %= HZ;
 771         next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
 772
 773         return next;
 774 }
 775
 776 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
 777 {
 778         struct mlx5_core_health *health = &dev->priv.health;
 779
 780         queue_work(health->wq, &health->fatal_report_work);
 781 }
 782
 783 #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
 784 static void mlx5_health_log_ts_update(struct work_struct *work)
 785 {
 786         struct delayed_work *dwork = to_delayed_work(work);
 787         u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
 788         u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
 789         struct mlx5_core_health *health;
 790         struct mlx5_core_dev *dev;
 791         struct mlx5_priv *priv;
 792         u64 now_us;
 793
 794         health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
 795         priv = container_of(health, struct mlx5_priv, health);
 796         dev = container_of(priv, struct mlx5_core_dev, priv);
 797
 798         now_us =  ktime_to_us(ktime_get_real());
 799
 800         MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
 801         MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
 802         mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
 803
 804         queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
 805                            msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
 806 }
 807
 808 static void poll_health(struct timer_list *t)
 809 {
 810         struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
 811         struct mlx5_core_health *health = &dev->priv.health;
 812         struct health_buffer __iomem *h = health->health;
 813         u32 fatal_error;
 814         u8 prev_synd;
 815         u32 count;
 816
 817         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 818                 goto out;
 819
 820         fatal_error = mlx5_health_check_fatal_sensors(dev);
 821
 822         if (fatal_error && !health->fatal_error) {
 823                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
 824                 dev->priv.health.fatal_error = fatal_error;
 825                 print_health_info(dev);
 826                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 827                 mlx5_trigger_health_work(dev);
 828                 return;
 829         }
 830
 831         count = ioread32be(health->health_counter);
 832         if (count == health->prev)
 833                 ++health->miss_counter;
 834         else
 835                 health->miss_counter = 0;
 836
 837         health->prev = count;
 838         if (health->miss_counter == MAX_MISSES) {
 839                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
 840                 print_health_info(dev);
 841                 queue_work(health->wq, &health->report_work);
 842         }
 843
 844         prev_synd = health->synd;
 845         health->synd = ioread8(&h->synd);
 846         if (health->synd && health->synd != prev_synd)
 847                 queue_work(health->wq, &health->report_work);
 848
 849 out:
 850         mod_timer(&health->timer, get_next_poll_jiffies(dev));
 851 }
 852
 853 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 854 {
 855         u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
 856         struct mlx5_core_health *health = &dev->priv.health;
 857
 858         timer_setup(&health->timer, poll_health, 0);
 859         health->fatal_error = MLX5_SENSOR_NO_ERR;
 860         clear_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
 861         health->health = &dev->iseg->health;
 862         health->health_counter = &dev->iseg->health_counter;
 863
 864         health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
 865         add_timer(&health->timer);
 866 }
 867
 868 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 869 {
 870         struct mlx5_core_health *health = &dev->priv.health;
 871
 872         if (disable_health)
 873                 set_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
 874
 875         del_timer_sync(&health->timer);
 876 }
 877
 878 void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
 879 {
 880         struct mlx5_core_health *health = &dev->priv.health;
 881
 882         if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
 883                 queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
 884 }
 885
 886 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 887 {
 888         struct mlx5_core_health *health = &dev->priv.health;
 889
 890         set_bit(MLX5_DROP_HEALTH_WORK, &health->flags);
 891         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
 892         cancel_work_sync(&health->report_work);
 893         cancel_work_sync(&health->fatal_report_work);
 894 }
 895
 896 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 897 {
 898         struct mlx5_core_health *health = &dev->priv.health;
 899
 900         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
 901         destroy_workqueue(health->wq);
 902         mlx5_reporter_vnic_destroy(dev);
 903         mlx5_fw_reporters_destroy(dev);
 904 }
 905
 906 int mlx5_health_init(struct mlx5_core_dev *dev)
 907 {
 908         struct mlx5_core_health *health;
 909         char *name;
 910
 911         mlx5_fw_reporters_create(dev);
 912         mlx5_reporter_vnic_create(dev);
 913
 914         health = &dev->priv.health;
 915         name = kmalloc(64, GFP_KERNEL);
 916         if (!name)
 917                 goto out_err;
 918
 919         strcpy(name, "mlx5_health");
 920         strcat(name, dev_name(dev->device));
 921         health->wq = create_singlethread_workqueue(name);
 922         kfree(name);
 923         if (!health->wq)
 924                 goto out_err;
 925         INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
 926         INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
 927         INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
 928
 929         return 0;
 930
 931 out_err:
 932         mlx5_reporter_vnic_destroy(dev);
 933         mlx5_fw_reporters_destroy(dev);
 934         return -ENOMEM;
 935 }