net/mlx5e: Improve MTT/KSM alignment
authorMaxim Mikityanskiy <maximmi@nvidia.com>
Sun, 2 Oct 2022 04:56:23 +0000 (21:56 -0700)
committerJakub Kicinski <kuba@kernel.org>
Mon, 3 Oct 2022 23:55:28 +0000 (16:55 -0700)
Make mlx5e_mpwrq_mtts_per_wqe take into account that KSM requires
smaller alignment than MTT.

Ensure that there is always an even amount of MTTs in a UMR WQE, so that
complete octwords are formed, and no garbage is mapped.

Drop extra alignment in MLX5_MTT_OCTW that may cause setting too big
ucseg->xlt_octowords, also leading to mapping garbage.

Generalize some calculations by introducing the MLX5_OCTWORD constant.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index a2d09f30acd1bb4e4af98c47c4bafd6ca3aed188..93607db1dea45f9bd8b652d319e01cbce0d3e364 100644 (file)
@@ -109,12 +109,8 @@ struct page_pool;
 #define MLX5_MPWRQ_MAX_PAGES_PER_WQE \
        rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt))
 
-#define MLX5_ALIGN_MTTS(mtts)          (ALIGN(mtts, 8))
-#define MLX5_ALIGNED_MTTS_OCTW(mtts)   ((mtts) / 2)
-#define MLX5_MTT_OCTW(mtts)            (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
-#define MLX5_KSM_OCTW(ksms)             (ksms)
 #define MLX5E_MAX_RQ_NUM_MTTS  \
-       (ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+       (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */
 #define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
 #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
 
index b57855bf76295eccc4f38a5617eba414b5c58604..e8c3b8abf941372b06dbc7dda4baedaabd695300 100644 (file)
@@ -91,6 +91,13 @@ u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
 
        pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
 
+       /* Two MTTs are needed to form an octword. The number of MTTs is encoded
+        * in octwords in a UMR WQE, so we need at least two to avoid mapping
+        * garbage addresses.
+        */
+       if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+               pages_per_wqe = 2;
+
        /* Sanity check for further calculations to succeed. */
        BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
        if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
@@ -131,7 +138,8 @@ u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
         * MTU. These oversize packets are dropped by the driver at a later
         * stage.
         */
-       return MLX5_ALIGN_MTTS(pages_per_wqe + 1);
+       return ALIGN(pages_per_wqe + 1,
+                    MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
 }
 
 u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
index e12a856331b896299ef98665b8c93eb74c4887ab..4b2df28955050536660c10841212a27236e39f33 100644 (file)
@@ -66,9 +66,10 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        umr_wqe->ctrl.opmod_idx_opcode =
                cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
 
+       /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
        offset = ix * rq->mpwqe.mtts_per_wqe;
        if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
-               offset = MLX5_ALIGNED_MTTS_OCTW(offset);
+               offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
        umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
 
        icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
index b5a416ff1603d3fe99bfb22c5289c6467d8e989c..2093b6cc6c7cf8d89a95a23afaee1ef1f235d496 100644 (file)
@@ -205,14 +205,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
 
 static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
 {
-       switch (umr_mode) {
-       case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
-               return MLX5_MTT_OCTW(entries);
-       case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
-               return MLX5_KSM_OCTW(entries);
-       }
-       WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
-       return 0;
+       u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+
+       WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD);
+
+       return entries * umr_entry_size / MLX5_OCTWORD;
 }
 
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
index b61604d8770193dd52c46391ca026e2f0780be87..58084650151f836102b0b09518a0525016aed3b7 100644 (file)
@@ -682,7 +682,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
                cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
                            MLX5_OPCODE_UMR);
 
-       offset = MLX5_ALIGNED_MTTS_OCTW(ix * rq->mpwqe.mtts_per_wqe);
+       offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
        umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
 
        sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {