From d7bb58fb1c0e7264a7261c7d0304121ef9402e94 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 1 Aug 2007 12:28:53 +0300 Subject: [PATCH] mlx4_core: Write MTTs from CPU instead with of WRITE_MTT FW command Write MTT entries directly to ICM from the driver (eliminating use of WRITE_MTT command). This reduces the number of FW commands needed to register an MR by at least a factor of 2 and speeds up memory registration significantly. This code will also be used to implement FMRs. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mr.c | 7 ++-- drivers/net/mlx4/icm.c | 21 +++++++--- drivers/net/mlx4/icm.h | 2 +- drivers/net/mlx4/main.c | 11 +++++ drivers/net/mlx4/mr.c | 73 ++++++++++++++++----------------- 5 files changed, 67 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 85ae906f1d12..734ec2bd15cd 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, pages[i++] = sg_dma_address(&chunk->page_list[j]) + umem->page_size * k; /* - * Be friendly to WRITE_MTT firmware - * command, and pass it chunks of - * appropriate size. + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. */ - if (i == PAGE_SIZE / sizeof (u64) - 2) { + if (i == PAGE_SIZE / sizeof (u64)) { err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); if (err) diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c index 250e24887578..4b3c109d5eae 100644 --- a/drivers/net/mlx4/icm.c +++ b/drivers/net/mlx4/icm.c @@ -301,9 +301,9 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) mutex_unlock(&table->mutex); } -void *mlx4_table_find(struct mlx4_icm_table *table, int obj) +void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle) { - int idx, offset, i; + int idx, offset, dma_offset, i; struct mlx4_icm_chunk *chunk; struct mlx4_icm *icm; struct page *page = NULL; @@ -313,15 +313,26 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj) mutex_lock(&table->mutex); - idx = obj & (table->num_obj - 1); - icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)]; - offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size); + idx = (obj & (table->num_obj - 1)) * table->obj_size; + icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE; if (!icm) goto out; list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { + if (dma_handle && dma_offset >= 0) { + if (sg_dma_len(&chunk->mem[i]) > dma_offset) + *dma_handle = sg_dma_address(&chunk->mem[i]) + + dma_offset; + dma_offset -= sg_dma_len(&chunk->mem[i]); + } + /* + * DMA mapping can merge pages but not split them, + * so if we found the page, dma_handle has already + * been assigned to. + */ if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; diff --git a/drivers/net/mlx4/icm.h b/drivers/net/mlx4/icm.h index a77db6de8597..6c44edf35847 100644 --- a/drivers/net/mlx4/icm.h +++ b/drivers/net/mlx4/icm.h @@ -83,7 +83,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); -void *mlx4_table_find(struct mlx4_icm_table *table, int obj); +void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, int start, int end); void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index ed7e8d76cde0..478b3ba74edc 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -300,6 +300,17 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev, goto err_unmap_cmpt; } + /* + * Reserved MTT entries must be aligned up to a cacheline + * boundary, since the FW will write to them, while the driver + * writes to all other MTT entries. (The variable + * dev->caps.mtt_entry_sz below is really the MTT segment + * size, not the raw entry size) + */ + dev->caps.reserved_mtts = + ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, + dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; + err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, init_hca->mtt_base, dev->caps.mtt_entry_sz, diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 60a6ee27cfd4..3cc98c699aaf 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -349,58 +349,57 @@ err_table: } EXPORT_SYMBOL_GPL(mlx4_mr_enable); -static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, - int num_mtt) +static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list) { - return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT, - MLX4_CMD_TIME_CLASS_B); + struct mlx4_priv *priv = mlx4_priv(dev); + __be64 *mtts; + dma_addr_t dma_handle; + int i; + int s = start_index * sizeof (u64); + + /* All MTTs must fit in the same page */ + if (start_index / (PAGE_SIZE / sizeof (u64)) != + (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64))) + return -EINVAL; + + if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1)) + return -EINVAL; + + mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg + + s / dev->caps.mtt_entry_sz, &dma_handle); + if (!mtts) + return -ENOMEM; + + for (i = 0; i < npages; ++i) + mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); + + dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); + + return 0; } int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list) { - struct mlx4_cmd_mailbox *mailbox; - __be64 *mtt_entry; - int i; - int err = 0; + int chunk; + int err; if (mtt->order < 0) return -EINVAL; - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - mtt_entry = mailbox->buf; - while (npages > 0) { - mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8); - mtt_entry[1] = 0; - - for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i) - mtt_entry[i + 2] = cpu_to_be64(page_list[i] | - MLX4_MTT_FLAG_PRESENT); - - /* - * If we have an odd number of entries to write, add - * one more dummy entry for firmware efficiency. - */ - if (i & 1) - mtt_entry[i + 2] = 0; - - err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1); + chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages); + err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list); if (err) - goto out; + return err; - npages -= i; - start_index += i; - page_list += i; + npages -= chunk; + start_index += chunk; + page_list += chunk; } -out: - mlx4_free_cmd_mailbox(dev, mailbox); - - return err; + return 0; } EXPORT_SYMBOL_GPL(mlx4_write_mtt); -- 2.25.1