Commit | Line | Data |
---|---|---|
b4e64397 DD |
1 | #ifndef DEF_RDMAVT_INCQP_H |
2 | #define DEF_RDMAVT_INCQP_H | |
3 | ||
4 | /* | |
fe314195 | 5 | * Copyright(c) 2016 Intel Corporation. |
b4e64397 DD |
6 | * |
7 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
8 | * redistributing this file, you may do so under either license. | |
9 | * | |
10 | * GPL LICENSE SUMMARY | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of version 2 of the GNU General Public License as | |
14 | * published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, but | |
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * BSD LICENSE | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
5a9cf6f2 | 51 | #include <rdma/rdma_vt.h> |
050eb7fb | 52 | #include <rdma/ib_pack.h> |
4e74080b | 53 | #include <rdma/ib_verbs.h> |
050eb7fb DD |
54 | /* |
55 | * Atomic bit definitions for r_aflags. | |
56 | */ | |
57 | #define RVT_R_WRID_VALID 0 | |
58 | #define RVT_R_REWIND_SGE 1 | |
59 | ||
60 | /* | |
61 | * Bit definitions for r_flags. | |
62 | */ | |
63 | #define RVT_R_REUSE_SGE 0x01 | |
64 | #define RVT_R_RDMAR_SEQ 0x02 | |
65 | #define RVT_R_RSP_NAK 0x04 | |
66 | #define RVT_R_RSP_SEND 0x08 | |
67 | #define RVT_R_COMM_EST 0x10 | |
68 | ||
69 | /* | |
70 | * Bit definitions for s_flags. | |
71 | * | |
72 | * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled | |
73 | * RVT_S_BUSY - send tasklet is processing the QP | |
74 | * RVT_S_TIMER - the RC retry timer is active | |
75 | * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics | |
76 | * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs | |
77 | * before processing the next SWQE | |
78 | * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete | |
79 | * before processing the next SWQE | |
80 | * RVT_S_WAIT_RNR - waiting for RNR timeout | |
81 | * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE | |
82 | * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating | |
83 | * next send completion entry not via send DMA | |
84 | * RVT_S_WAIT_PIO - waiting for a send buffer to be available | |
85 | * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available | |
86 | * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available | |
87 | * RVT_S_WAIT_KMEM - waiting for kernel memory to be available | |
88 | * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue | |
89 | * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests | |
90 | * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK | |
91 | * RVT_S_ECN - a BECN was queued to the send engine | |
92 | */ | |
93 | #define RVT_S_SIGNAL_REQ_WR 0x0001 | |
94 | #define RVT_S_BUSY 0x0002 | |
95 | #define RVT_S_TIMER 0x0004 | |
96 | #define RVT_S_RESP_PENDING 0x0008 | |
97 | #define RVT_S_ACK_PENDING 0x0010 | |
98 | #define RVT_S_WAIT_FENCE 0x0020 | |
99 | #define RVT_S_WAIT_RDMAR 0x0040 | |
100 | #define RVT_S_WAIT_RNR 0x0080 | |
101 | #define RVT_S_WAIT_SSN_CREDIT 0x0100 | |
102 | #define RVT_S_WAIT_DMA 0x0200 | |
103 | #define RVT_S_WAIT_PIO 0x0400 | |
104 | #define RVT_S_WAIT_TX 0x0800 | |
105 | #define RVT_S_WAIT_DMA_DESC 0x1000 | |
106 | #define RVT_S_WAIT_KMEM 0x2000 | |
107 | #define RVT_S_WAIT_PSN 0x4000 | |
108 | #define RVT_S_WAIT_ACK 0x8000 | |
109 | #define RVT_S_SEND_ONE 0x10000 | |
110 | #define RVT_S_UNLIMITED_CREDIT 0x20000 | |
111 | #define RVT_S_AHG_VALID 0x40000 | |
112 | #define RVT_S_AHG_CLEAR 0x80000 | |
113 | #define RVT_S_ECN 0x100000 | |
114 | ||
115 | /* | |
116 | * Wait flags that would prevent any packet type from being sent. | |
117 | */ | |
118 | #define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ | |
119 | RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) | |
120 | ||
121 | /* | |
122 | * Wait flags that would prevent send work requests from making progress. | |
123 | */ | |
124 | #define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ | |
125 | RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ | |
126 | RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) | |
127 | ||
128 | #define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) | |
129 | ||
130 | /* Number of bits to pay attention to in the opcode for checking qp type */ | |
131 | #define RVT_OPCODE_QP_MASK 0xE0 | |
132 | ||
bfbac097 DD |
133 | /* Flags for checking QP state (see ib_rvt_state_ops[]) */ |
134 | #define RVT_POST_SEND_OK 0x01 | |
135 | #define RVT_POST_RECV_OK 0x02 | |
136 | #define RVT_PROCESS_RECV_OK 0x04 | |
137 | #define RVT_PROCESS_SEND_OK 0x08 | |
138 | #define RVT_PROCESS_NEXT_SEND_OK 0x10 | |
139 | #define RVT_FLUSH_SEND 0x20 | |
140 | #define RVT_FLUSH_RECV 0x40 | |
141 | #define RVT_PROCESS_OR_FLUSH_SEND \ | |
142 | (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) | |
143 | ||
b4e64397 DD |
144 | /* |
145 | * Send work request queue entry. | |
146 | * The size of the sg_list is determined when the QP is created and stored | |
147 | * in qp->s_max_sge. | |
148 | */ | |
149 | struct rvt_swqe { | |
150 | union { | |
151 | struct ib_send_wr wr; /* don't use wr.sg_list */ | |
152 | struct ib_ud_wr ud_wr; | |
153 | struct ib_reg_wr reg_wr; | |
154 | struct ib_rdma_wr rdma_wr; | |
155 | struct ib_atomic_wr atomic_wr; | |
156 | }; | |
157 | u32 psn; /* first packet sequence number */ | |
158 | u32 lpsn; /* last packet sequence number */ | |
159 | u32 ssn; /* send sequence number */ | |
160 | u32 length; /* total length of data in sg_list */ | |
161 | struct rvt_sge sg_list[0]; | |
162 | }; | |
163 | ||
164 | /* | |
165 | * Receive work request queue entry. | |
166 | * The size of the sg_list is determined when the QP (or SRQ) is created | |
167 | * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). | |
168 | */ | |
169 | struct rvt_rwqe { | |
170 | u64 wr_id; | |
171 | u8 num_sge; | |
172 | struct ib_sge sg_list[0]; | |
173 | }; | |
174 | ||
175 | /* | |
176 | * This structure is used to contain the head pointer, tail pointer, | |
177 | * and receive work queue entries as a single memory allocation so | |
178 | * it can be mmap'ed into user space. | |
179 | * Note that the wq array elements are variable size so you can't | |
180 | * just index into the array to get the N'th element; | |
181 | * use get_rwqe_ptr() instead. | |
182 | */ | |
183 | struct rvt_rwq { | |
184 | u32 head; /* new work requests posted to the head */ | |
185 | u32 tail; /* receives pull requests from here. */ | |
186 | struct rvt_rwqe wq[0]; | |
187 | }; | |
188 | ||
189 | struct rvt_rq { | |
190 | struct rvt_rwq *wq; | |
191 | u32 size; /* size of RWQE array */ | |
192 | u8 max_sge; | |
193 | /* protect changes in this struct */ | |
194 | spinlock_t lock ____cacheline_aligned_in_smp; | |
195 | }; | |
196 | ||
197 | /* | |
198 | * This structure is used by rvt_mmap() to validate an offset | |
199 | * when an mmap() request is made. The vm_area_struct then uses | |
200 | * this as its vm_private_data. | |
201 | */ | |
202 | struct rvt_mmap_info { | |
203 | struct list_head pending_mmaps; | |
204 | struct ib_ucontext *context; | |
205 | void *obj; | |
206 | __u64 offset; | |
207 | struct kref ref; | |
208 | unsigned size; | |
209 | }; | |
210 | ||
211 | #define RVT_MAX_RDMA_ATOMIC 16 | |
212 | ||
213 | /* | |
214 | * This structure holds the information that the send tasklet needs | |
215 | * to send a RDMA read response or atomic operation. | |
216 | */ | |
217 | struct rvt_ack_entry { | |
218 | u8 opcode; | |
219 | u8 sent; | |
220 | u32 psn; | |
221 | u32 lpsn; | |
222 | union { | |
223 | struct rvt_sge rdma_sge; | |
224 | u64 atomic_data; | |
225 | }; | |
226 | }; | |
227 | ||
bfee5e32 VM |
228 | #define RC_QP_SCALING_INTERVAL 5 |
229 | ||
b4e64397 DD |
230 | /* |
231 | * Variables prefixed with s_ are for the requester (sender). | |
232 | * Variables prefixed with r_ are for the responder (receiver). | |
233 | * Variables prefixed with ack_ are for responder replies. | |
234 | * | |
235 | * Common variables are protected by both r_rq.lock and s_lock in that order | |
236 | * which only happens in modify_qp() or changing the QP 'state'. | |
237 | */ | |
238 | struct rvt_qp { | |
239 | struct ib_qp ibqp; | |
240 | void *priv; /* Driver private data */ | |
241 | /* read mostly fields above and below */ | |
242 | struct ib_ah_attr remote_ah_attr; | |
243 | struct ib_ah_attr alt_ah_attr; | |
244 | struct rvt_qp __rcu *next; /* link list for QPN hash table */ | |
245 | struct rvt_swqe *s_wq; /* send work queue */ | |
246 | struct rvt_mmap_info *ip; | |
247 | ||
248 | unsigned long timeout_jiffies; /* computed from timeout */ | |
249 | ||
250 | enum ib_mtu path_mtu; | |
251 | int srate_mbps; /* s_srate (below) converted to Mbit/s */ | |
252 | u32 remote_qpn; | |
253 | u32 pmtu; /* decoded from path_mtu */ | |
254 | u32 qkey; /* QKEY for this QP (for UD or RD) */ | |
255 | u32 s_size; /* send work queue size */ | |
b4e64397 DD |
256 | u32 s_ahgpsn; /* set to the psn in the copy of the header */ |
257 | ||
258 | u8 state; /* QP state */ | |
259 | u8 allowed_ops; /* high order bits of allowed opcodes */ | |
260 | u8 qp_access_flags; | |
261 | u8 alt_timeout; /* Alternate path timeout for this QP */ | |
262 | u8 timeout; /* Timeout for this QP */ | |
263 | u8 s_srate; | |
264 | u8 s_mig_state; | |
265 | u8 port_num; | |
266 | u8 s_pkey_index; /* PKEY index to use */ | |
267 | u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ | |
268 | u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ | |
269 | u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ | |
270 | u8 s_retry_cnt; /* number of times to retry */ | |
271 | u8 s_rnr_retry_cnt; | |
272 | u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ | |
273 | u8 s_max_sge; /* size of s_wq->sg_list */ | |
274 | u8 s_draining; | |
275 | ||
276 | /* start of read/write fields */ | |
277 | atomic_t refcount ____cacheline_aligned_in_smp; | |
278 | wait_queue_head_t wait; | |
279 | ||
280 | struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] | |
281 | ____cacheline_aligned_in_smp; | |
282 | struct rvt_sge_state s_rdma_read_sge; | |
283 | ||
284 | spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ | |
285 | unsigned long r_aflags; | |
286 | u64 r_wr_id; /* ID for current receive WQE */ | |
287 | u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ | |
288 | u32 r_len; /* total length of r_sge */ | |
289 | u32 r_rcv_len; /* receive data len processed */ | |
290 | u32 r_psn; /* expected rcv packet sequence number */ | |
291 | u32 r_msn; /* message sequence number */ | |
292 | ||
293 | u8 r_state; /* opcode of last packet received */ | |
294 | u8 r_flags; | |
295 | u8 r_head_ack_queue; /* index into s_ack_queue[] */ | |
296 | ||
297 | struct list_head rspwait; /* link for waiting to respond */ | |
298 | ||
299 | struct rvt_sge_state r_sge; /* current receive data */ | |
300 | struct rvt_rq r_rq; /* receive work queue */ | |
301 | ||
302 | spinlock_t s_lock ____cacheline_aligned_in_smp; | |
303 | struct rvt_sge_state *s_cur_sge; | |
304 | u32 s_flags; | |
305 | struct rvt_swqe *s_wqe; | |
306 | struct rvt_sge_state s_sge; /* current send request data */ | |
307 | struct rvt_mregion *s_rdma_mr; | |
b4e64397 DD |
308 | u32 s_cur_size; /* size of send packet in bytes */ |
309 | u32 s_len; /* total length of s_sge */ | |
310 | u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ | |
311 | u32 s_next_psn; /* PSN for next request */ | |
312 | u32 s_last_psn; /* last response PSN processed */ | |
313 | u32 s_sending_psn; /* lowest PSN that is being sent */ | |
314 | u32 s_sending_hpsn; /* highest PSN that is being sent */ | |
315 | u32 s_psn; /* current packet sequence number */ | |
316 | u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ | |
317 | u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ | |
318 | u32 s_head; /* new entries added here */ | |
319 | u32 s_tail; /* next entry to process */ | |
320 | u32 s_cur; /* current work queue entry */ | |
321 | u32 s_acked; /* last un-ACK'ed entry */ | |
322 | u32 s_last; /* last completed entry */ | |
323 | u32 s_ssn; /* SSN of tail entry */ | |
324 | u32 s_lsn; /* limit sequence number (credit) */ | |
325 | u16 s_hdrwords; /* size of s_hdr in 32 bit words */ | |
326 | u16 s_rdma_ack_cnt; | |
327 | s8 s_ahgidx; | |
328 | u8 s_state; /* opcode of last packet sent */ | |
329 | u8 s_ack_state; /* opcode of packet to ACK */ | |
330 | u8 s_nak_state; /* non-zero if NAK is pending */ | |
331 | u8 r_nak_state; /* non-zero if NAK is pending */ | |
332 | u8 s_retry; /* requester retry counter */ | |
333 | u8 s_rnr_retry; /* requester RNR retry counter */ | |
334 | u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ | |
335 | u8 s_tail_ack_queue; /* index into s_ack_queue[] */ | |
336 | ||
337 | struct rvt_sge_state s_ack_rdma_sge; | |
338 | struct timer_list s_timer; | |
339 | ||
340 | /* | |
341 | * This sge list MUST be last. Do not add anything below here. | |
342 | */ | |
343 | struct rvt_sge r_sg_list[0] /* verified SGEs */ | |
344 | ____cacheline_aligned_in_smp; | |
345 | }; | |
346 | ||
347 | struct rvt_srq { | |
348 | struct ib_srq ibsrq; | |
349 | struct rvt_rq rq; | |
350 | struct rvt_mmap_info *ip; | |
351 | /* send signal when number of RWQEs < limit */ | |
352 | u32 limit; | |
353 | }; | |
354 | ||
0acb0cc7 DD |
355 | #define RVT_QPN_MAX BIT(24) |
356 | #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) | |
357 | #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) | |
358 | #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) | |
3b0b3fb3 | 359 | #define RVT_QPN_MASK 0xFFFFFF |
0acb0cc7 DD |
360 | |
361 | /* | |
362 | * QPN-map pages start out as NULL, they get allocated upon | |
363 | * first use and are never deallocated. This way, | |
364 | * large bitmaps are not allocated unless large numbers of QPs are used. | |
365 | */ | |
366 | struct rvt_qpn_map { | |
367 | void *page; | |
368 | }; | |
369 | ||
370 | struct rvt_qpn_table { | |
371 | spinlock_t lock; /* protect changes to the qp table */ | |
372 | unsigned flags; /* flags for QP0/1 allocated for each port */ | |
373 | u32 last; /* last QP number allocated */ | |
374 | u32 nmaps; /* size of the map table */ | |
375 | u16 limit; | |
376 | u8 incr; | |
377 | /* bit map of free QP numbers other than 0/1 */ | |
378 | struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; | |
379 | }; | |
380 | ||
381 | struct rvt_qp_ibdev { | |
382 | u32 qp_table_size; | |
383 | u32 qp_table_bits; | |
384 | struct rvt_qp __rcu **qp_table; | |
385 | spinlock_t qpt_lock; /* qptable lock */ | |
386 | struct rvt_qpn_table qpn_table; | |
387 | }; | |
388 | ||
4e74080b DD |
389 | /* |
390 | * There is one struct rvt_mcast for each multicast GID. | |
391 | * All attached QPs are then stored as a list of | |
392 | * struct rvt_mcast_qp. | |
393 | */ | |
394 | struct rvt_mcast_qp { | |
395 | struct list_head list; | |
396 | struct rvt_qp *qp; | |
397 | }; | |
398 | ||
399 | struct rvt_mcast { | |
400 | struct rb_node rb_node; | |
401 | union ib_gid mgid; | |
402 | struct list_head qp_list; | |
403 | wait_queue_head_t wait; | |
404 | atomic_t refcount; | |
405 | int n_attached; | |
406 | }; | |
407 | ||
bfbac097 DD |
408 | /* |
409 | * Since struct rvt_swqe is not a fixed size, we can't simply index into | |
4e74080b | 410 | * struct rvt_qp.s_wq. This function does the array index computation. |
bfbac097 DD |
411 | */ |
412 | static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, | |
413 | unsigned n) | |
414 | { | |
415 | return (struct rvt_swqe *)((char *)qp->s_wq + | |
416 | (sizeof(struct rvt_swqe) + | |
417 | qp->s_max_sge * | |
418 | sizeof(struct rvt_sge)) * n); | |
419 | } | |
420 | ||
3b0b3fb3 DD |
421 | /* |
422 | * Since struct rvt_rwqe is not a fixed size, we can't simply index into | |
423 | * struct rvt_rwq.wq. This function does the array index computation. | |
424 | */ | |
425 | static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) | |
426 | { | |
427 | return (struct rvt_rwqe *) | |
428 | ((char *)rq->wq->wq + | |
429 | (sizeof(struct rvt_rwqe) + | |
430 | rq->max_sge * sizeof(struct ib_sge)) * n); | |
431 | } | |
432 | ||
bfbac097 DD |
433 | extern const int ib_rvt_state_ops[]; |
434 | ||
3b0b3fb3 DD |
435 | struct rvt_dev_info; |
436 | void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); | |
437 | void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); | |
438 | int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); | |
439 | void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); | |
440 | void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); | |
441 | ||
b4e64397 | 442 | #endif /* DEF_RDMAVT_INCQP_H */ |