RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting
authorDanil Kipnis <danil.kipnis@cloud.ionos.com>
Fri, 24 Jul 2020 11:15:06 +0000 (16:45 +0530)
committerJason Gunthorpe <jgg@nvidia.com>
Wed, 29 Jul 2020 17:26:53 +0000 (14:26 -0300)
In order to avoid all the clients to start reconnecting at the same time
schedule the reconnect dwork with a random jitter of +[0,8] seconds.

Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality")
Link: https://lore.kernel.org/r/20200724111508.15734-2-haris.iqbal@cloud.ionos.com
Signed-off-by: Danil Kipnis <danil.kipnis@cloud.ionos.com>
Signed-off-by: Md Haris Iqbal <haris.iqbal@cloud.ionos.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/ulp/rtrs/rtrs-clt.c

index 564388a85603f5cb782994a86a6925a964426209..5b31d3b03737ca366db3f2e514eebfc4d6299946 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/rculist.h>
+#include <linux/random.h>
 
 #include "rtrs-clt.h"
 #include "rtrs-log.h"
  * leads to "false positives" failed reconnect attempts
  */
 #define RTRS_RECONNECT_BACKOFF 1000
+/*
+ * Wait for additional random time between 0 and 8 seconds
+ * before starting to reconnect to avoid clients reconnecting
+ * all at once in case of a major network outage
+ */
+#define RTRS_RECONNECT_SEED 8
 
 MODULE_DESCRIPTION("RDMA Transport Client");
 MODULE_LICENSE("GPL");
@@ -306,7 +313,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
                 */
                delay_ms = clt->reconnect_delay_sec * 1000;
                queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
-                                  msecs_to_jiffies(delay_ms));
+                                  msecs_to_jiffies(delay_ms +
+                                                   prandom_u32() % RTRS_RECONNECT_SEED));
        } else {
                /*
                 * Error can happen just on establishing new connection,
@@ -2503,7 +2511,9 @@ reconnect_again:
                sess->stats->reconnects.fail_cnt++;
                delay_ms = clt->reconnect_delay_sec * 1000;
                queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
-                                  msecs_to_jiffies(delay_ms));
+                                  msecs_to_jiffies(delay_ms +
+                                                   prandom_u32() %
+                                                   RTRS_RECONNECT_SEED));
        }
 }