treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 174
[linux-2.6-block.git] / drivers / misc / mic / scif / scif_api.c
CommitLineData
1802d0be 1// SPDX-License-Identifier: GPL-2.0-only
e9089f43
SD
2/*
3 * Intel MIC Platform Software Stack (MPSS)
4 *
5 * Copyright(c) 2014 Intel Corporation.
6 *
e9089f43 7 * Intel SCIF driver.
e9089f43
SD
8 */
9#include <linux/scif.h>
10#include "scif_main.h"
11#include "scif_map.h"
12
13static const char * const scif_ep_states[] = {
14 "Unbound",
15 "Bound",
16 "Listening",
17 "Connected",
18 "Connecting",
19 "Mapping",
20 "Closing",
21 "Close Listening",
22 "Disconnected",
23 "Zombie"};
24
25enum conn_async_state {
26 ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
27 ASYNC_CONN_INPROGRESS, /* async connect in progress */
28 ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
29};
30
b7f94441
AD
31/*
32 * File operations for anonymous inode file associated with a SCIF endpoint,
33 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
34 * poll API in the kernel and these take in a struct file *. Since a struct
35 * file is not available to kernel mode SCIF, it uses an anonymous file for
36 * this purpose.
37 */
38const struct file_operations scif_anon_fops = {
39 .owner = THIS_MODULE,
40};
41
e9089f43
SD
42scif_epd_t scif_open(void)
43{
44 struct scif_endpt *ep;
b7f94441 45 int err;
e9089f43
SD
46
47 might_sleep();
48 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
49 if (!ep)
50 goto err_ep_alloc;
51
52 ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
53 if (!ep->qp_info.qp)
54 goto err_qp_alloc;
55
b7f94441
AD
56 err = scif_anon_inode_getfile(ep);
57 if (err)
58 goto err_anon_inode;
59
e9089f43
SD
60 spin_lock_init(&ep->lock);
61 mutex_init(&ep->sendlock);
62 mutex_init(&ep->recvlock);
63
d1824329 64 scif_rma_ep_init(ep);
e9089f43
SD
65 ep->state = SCIFEP_UNBOUND;
66 dev_dbg(scif_info.mdev.this_device,
67 "SCIFAPI open: ep %p success\n", ep);
68 return ep;
69
b7f94441
AD
70err_anon_inode:
71 kfree(ep->qp_info.qp);
e9089f43
SD
72err_qp_alloc:
73 kfree(ep);
74err_ep_alloc:
75 return NULL;
76}
77EXPORT_SYMBOL_GPL(scif_open);
78
79/*
80 * scif_disconnect_ep - Disconnects the endpoint if found
81 * @epd: The end point returned from scif_open()
82 */
83static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
84{
85 struct scifmsg msg;
86 struct scif_endpt *fep = NULL;
87 struct scif_endpt *tmpep;
88 struct list_head *pos, *tmpq;
89 int err;
90
91 /*
92 * Wake up any threads blocked in send()/recv() before closing
93 * out the connection. Grabbing and releasing the send/recv lock
94 * will ensure that any blocked senders/receivers have exited for
95 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
96 * close. Ring 3 endpoints are not affected since close will not
97 * be called while there are IOCTLs executing.
98 */
99 wake_up_interruptible(&ep->sendwq);
100 wake_up_interruptible(&ep->recvwq);
101 mutex_lock(&ep->sendlock);
102 mutex_unlock(&ep->sendlock);
103 mutex_lock(&ep->recvlock);
104 mutex_unlock(&ep->recvlock);
105
106 /* Remove from the connected list */
107 mutex_lock(&scif_info.connlock);
108 list_for_each_safe(pos, tmpq, &scif_info.connected) {
109 tmpep = list_entry(pos, struct scif_endpt, list);
110 if (tmpep == ep) {
111 list_del(pos);
112 fep = tmpep;
113 spin_lock(&ep->lock);
114 break;
115 }
116 }
117
118 if (!fep) {
119 /*
120 * The other side has completed the disconnect before
121 * the end point can be removed from the list. Therefore
122 * the ep lock is not locked, traverse the disconnected
123 * list to find the endpoint and release the conn lock.
124 */
125 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
126 tmpep = list_entry(pos, struct scif_endpt, list);
127 if (tmpep == ep) {
128 list_del(pos);
129 break;
130 }
131 }
132 mutex_unlock(&scif_info.connlock);
133 return NULL;
134 }
135
136 init_completion(&ep->discon);
137 msg.uop = SCIF_DISCNCT;
138 msg.src = ep->port;
139 msg.dst = ep->peer;
140 msg.payload[0] = (u64)ep;
141 msg.payload[1] = ep->remote_ep;
142
143 err = scif_nodeqp_send(ep->remote_dev, &msg);
144 spin_unlock(&ep->lock);
145 mutex_unlock(&scif_info.connlock);
146
147 if (!err)
148 /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
149 wait_for_completion_timeout(&ep->discon,
150 SCIF_NODE_ALIVE_TIMEOUT);
151 return ep;
152}
153
154int scif_close(scif_epd_t epd)
155{
156 struct scif_endpt *ep = (struct scif_endpt *)epd;
157 struct scif_endpt *tmpep;
158 struct list_head *pos, *tmpq;
159 enum scif_epd_state oldstate;
160 bool flush_conn;
161
162 dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
163 ep, scif_ep_states[ep->state]);
164 might_sleep();
165 spin_lock(&ep->lock);
166 flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
167 spin_unlock(&ep->lock);
168
169 if (flush_conn)
170 flush_work(&scif_info.conn_work);
171
172 spin_lock(&ep->lock);
173 oldstate = ep->state;
174
175 ep->state = SCIFEP_CLOSING;
176
177 switch (oldstate) {
178 case SCIFEP_ZOMBIE:
d1824329
SD
179 dev_err(scif_info.mdev.this_device,
180 "SCIFAPI close: zombie state unexpected\n");
bcde98fc 181 /* fall through */
e9089f43
SD
182 case SCIFEP_DISCONNECTED:
183 spin_unlock(&ep->lock);
d1824329 184 scif_unregister_all_windows(epd);
e9089f43
SD
185 /* Remove from the disconnected list */
186 mutex_lock(&scif_info.connlock);
187 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
188 tmpep = list_entry(pos, struct scif_endpt, list);
189 if (tmpep == ep) {
190 list_del(pos);
191 break;
192 }
193 }
194 mutex_unlock(&scif_info.connlock);
195 break;
196 case SCIFEP_UNBOUND:
197 case SCIFEP_BOUND:
198 case SCIFEP_CONNECTING:
199 spin_unlock(&ep->lock);
200 break;
201 case SCIFEP_MAPPING:
202 case SCIFEP_CONNECTED:
203 case SCIFEP_CLOSING:
204 {
205 spin_unlock(&ep->lock);
d1824329 206 scif_unregister_all_windows(epd);
e9089f43
SD
207 scif_disconnect_ep(ep);
208 break;
209 }
210 case SCIFEP_LISTENING:
211 case SCIFEP_CLLISTEN:
212 {
213 struct scif_conreq *conreq;
214 struct scifmsg msg;
215 struct scif_endpt *aep;
216
217 spin_unlock(&ep->lock);
d1824329 218 mutex_lock(&scif_info.eplock);
e9089f43
SD
219
220 /* remove from listen list */
221 list_for_each_safe(pos, tmpq, &scif_info.listen) {
222 tmpep = list_entry(pos, struct scif_endpt, list);
223 if (tmpep == ep)
224 list_del(pos);
225 }
226 /* Remove any dangling accepts */
227 while (ep->acceptcnt) {
228 aep = list_first_entry(&ep->li_accept,
229 struct scif_endpt, liacceptlist);
230 list_del(&aep->liacceptlist);
231 scif_put_port(aep->port.port);
232 list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
233 tmpep = list_entry(pos, struct scif_endpt,
234 miacceptlist);
235 if (tmpep == aep) {
236 list_del(pos);
237 break;
238 }
239 }
d1824329 240 mutex_unlock(&scif_info.eplock);
e9089f43
SD
241 mutex_lock(&scif_info.connlock);
242 list_for_each_safe(pos, tmpq, &scif_info.connected) {
243 tmpep = list_entry(pos,
244 struct scif_endpt, list);
245 if (tmpep == aep) {
246 list_del(pos);
247 break;
248 }
249 }
250 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
251 tmpep = list_entry(pos,
252 struct scif_endpt, list);
253 if (tmpep == aep) {
254 list_del(pos);
255 break;
256 }
257 }
258 mutex_unlock(&scif_info.connlock);
259 scif_teardown_ep(aep);
d1824329 260 mutex_lock(&scif_info.eplock);
e9089f43
SD
261 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
262 ep->acceptcnt--;
263 }
264
265 spin_lock(&ep->lock);
d1824329 266 mutex_unlock(&scif_info.eplock);
e9089f43
SD
267
268 /* Remove and reject any pending connection requests. */
269 while (ep->conreqcnt) {
270 conreq = list_first_entry(&ep->conlist,
271 struct scif_conreq, list);
272 list_del(&conreq->list);
273
274 msg.uop = SCIF_CNCT_REJ;
275 msg.dst.node = conreq->msg.src.node;
276 msg.dst.port = conreq->msg.src.port;
277 msg.payload[0] = conreq->msg.payload[0];
278 msg.payload[1] = conreq->msg.payload[1];
279 /*
280 * No Error Handling on purpose for scif_nodeqp_send().
281 * If the remote node is lost we still want free the
282 * connection requests on the self node.
283 */
284 scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
285 &msg);
286 ep->conreqcnt--;
287 kfree(conreq);
288 }
289
290 spin_unlock(&ep->lock);
291 /* If a kSCIF accept is waiting wake it up */
292 wake_up_interruptible(&ep->conwq);
293 break;
294 }
295 }
296 scif_put_port(ep->port.port);
b7f94441 297 scif_anon_inode_fput(ep);
e9089f43
SD
298 scif_teardown_ep(ep);
299 scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
300 return 0;
301}
302EXPORT_SYMBOL_GPL(scif_close);
303
304/**
305 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
306 * accept new connections.
307 * @epd: The end point returned from scif_open()
308 */
309int __scif_flush(scif_epd_t epd)
310{
311 struct scif_endpt *ep = (struct scif_endpt *)epd;
312
313 switch (ep->state) {
314 case SCIFEP_LISTENING:
315 {
316 ep->state = SCIFEP_CLLISTEN;
317
318 /* If an accept is waiting wake it up */
319 wake_up_interruptible(&ep->conwq);
320 break;
321 }
322 default:
323 break;
324 }
325 return 0;
326}
327
328int scif_bind(scif_epd_t epd, u16 pn)
329{
330 struct scif_endpt *ep = (struct scif_endpt *)epd;
331 int ret = 0;
332 int tmp;
333
334 dev_dbg(scif_info.mdev.this_device,
335 "SCIFAPI bind: ep %p %s requested port number %d\n",
336 ep, scif_ep_states[ep->state], pn);
337 if (pn) {
338 /*
339 * Similar to IETF RFC 1700, SCIF ports below
340 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
341 * processes or by processes executed by privileged users.
342 */
343 if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
344 ret = -EACCES;
345 goto scif_bind_admin_exit;
346 }
347 }
348
349 spin_lock(&ep->lock);
350 if (ep->state == SCIFEP_BOUND) {
351 ret = -EINVAL;
352 goto scif_bind_exit;
353 } else if (ep->state != SCIFEP_UNBOUND) {
354 ret = -EISCONN;
355 goto scif_bind_exit;
356 }
357
358 if (pn) {
359 tmp = scif_rsrv_port(pn);
360 if (tmp != pn) {
361 ret = -EINVAL;
362 goto scif_bind_exit;
363 }
364 } else {
a39284ae
DC
365 ret = scif_get_new_port();
366 if (ret < 0)
e9089f43 367 goto scif_bind_exit;
a39284ae 368 pn = ret;
e9089f43
SD
369 }
370
371 ep->state = SCIFEP_BOUND;
372 ep->port.node = scif_info.nodeid;
373 ep->port.port = pn;
374 ep->conn_async_state = ASYNC_CONN_IDLE;
375 ret = pn;
376 dev_dbg(scif_info.mdev.this_device,
377 "SCIFAPI bind: bound to port number %d\n", pn);
378scif_bind_exit:
379 spin_unlock(&ep->lock);
380scif_bind_admin_exit:
381 return ret;
382}
383EXPORT_SYMBOL_GPL(scif_bind);
384
385int scif_listen(scif_epd_t epd, int backlog)
386{
387 struct scif_endpt *ep = (struct scif_endpt *)epd;
388
389 dev_dbg(scif_info.mdev.this_device,
390 "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
391 spin_lock(&ep->lock);
392 switch (ep->state) {
393 case SCIFEP_ZOMBIE:
394 case SCIFEP_CLOSING:
395 case SCIFEP_CLLISTEN:
396 case SCIFEP_UNBOUND:
397 case SCIFEP_DISCONNECTED:
398 spin_unlock(&ep->lock);
399 return -EINVAL;
400 case SCIFEP_LISTENING:
401 case SCIFEP_CONNECTED:
402 case SCIFEP_CONNECTING:
403 case SCIFEP_MAPPING:
404 spin_unlock(&ep->lock);
405 return -EISCONN;
406 case SCIFEP_BOUND:
407 break;
408 }
409
410 ep->state = SCIFEP_LISTENING;
411 ep->backlog = backlog;
412
413 ep->conreqcnt = 0;
414 ep->acceptcnt = 0;
415 INIT_LIST_HEAD(&ep->conlist);
416 init_waitqueue_head(&ep->conwq);
417 INIT_LIST_HEAD(&ep->li_accept);
418 spin_unlock(&ep->lock);
419
420 /*
421 * Listen status is complete so delete the qp information not needed
422 * on a listen before placing on the list of listening ep's
423 */
424 scif_teardown_ep(ep);
425 ep->qp_info.qp = NULL;
426
d1824329 427 mutex_lock(&scif_info.eplock);
e9089f43 428 list_add_tail(&ep->list, &scif_info.listen);
d1824329 429 mutex_unlock(&scif_info.eplock);
e9089f43
SD
430 return 0;
431}
432EXPORT_SYMBOL_GPL(scif_listen);
76371c7c
NR
433
434/*
435 ************************************************************************
436 * SCIF connection flow:
437 *
438 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
439 * connections via a SCIF_CNCT_REQ message
440 * 2) A SCIF endpoint can initiate a SCIF connection by calling
441 * scif_connect(..) which calls scif_setup_qp_connect(..) which
442 * allocates the local qp for the endpoint ring buffer and then sends
443 * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
444 * a SCIF_CNCT_REJ message
445 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
446 * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
447 * message otherwise
448 * 4) A thread blocked waiting for incoming connections allocates its local
449 * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
450 * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
451 * the node sends a SCIF_CNCT_REJ message
452 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
453 * connecting endpoint is woken up as part of handling
454 * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
455 * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
456 * success or a SCIF_CNCT_GNTNACK message on failure and completes
457 * the scif_connect(..) API
458 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
459 * in step 4 is woken up and completes the scif_accept(..) API
460 * 7) The SCIF connection is now established between the two SCIF endpoints.
461 */
462static int scif_conn_func(struct scif_endpt *ep)
463{
464 int err = 0;
465 struct scifmsg msg;
466 struct device *spdev;
467
d1824329
SD
468 err = scif_reserve_dma_chan(ep);
469 if (err) {
470 dev_err(&ep->remote_dev->sdev->dev,
471 "%s %d err %d\n", __func__, __LINE__, err);
472 ep->state = SCIFEP_BOUND;
473 goto connect_error_simple;
474 }
76371c7c
NR
475 /* Initiate the first part of the endpoint QP setup */
476 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
477 SCIF_ENDPT_QP_SIZE, ep->remote_dev);
478 if (err) {
479 dev_err(&ep->remote_dev->sdev->dev,
480 "%s err %d qp_offset 0x%llx\n",
481 __func__, err, ep->qp_info.qp_offset);
482 ep->state = SCIFEP_BOUND;
483 goto connect_error_simple;
484 }
485
486 spdev = scif_get_peer_dev(ep->remote_dev);
487 if (IS_ERR(spdev)) {
488 err = PTR_ERR(spdev);
489 goto cleanup_qp;
490 }
491 /* Format connect message and send it */
492 msg.src = ep->port;
493 msg.dst = ep->conn_port;
494 msg.uop = SCIF_CNCT_REQ;
495 msg.payload[0] = (u64)ep;
496 msg.payload[1] = ep->qp_info.qp_offset;
497 err = _scif_nodeqp_send(ep->remote_dev, &msg);
498 if (err)
499 goto connect_error_dec;
500 scif_put_peer_dev(spdev);
501 /*
502 * Wait for the remote node to respond with SCIF_CNCT_GNT or
503 * SCIF_CNCT_REJ message.
504 */
505 err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
506 SCIF_NODE_ALIVE_TIMEOUT);
507 if (!err) {
508 dev_err(&ep->remote_dev->sdev->dev,
509 "%s %d timeout\n", __func__, __LINE__);
510 ep->state = SCIFEP_BOUND;
511 }
512 spdev = scif_get_peer_dev(ep->remote_dev);
513 if (IS_ERR(spdev)) {
514 err = PTR_ERR(spdev);
515 goto cleanup_qp;
516 }
517 if (ep->state == SCIFEP_MAPPING) {
518 err = scif_setup_qp_connect_response(ep->remote_dev,
519 ep->qp_info.qp,
520 ep->qp_info.gnt_pld);
521 /*
522 * If the resource to map the queue are not available then
523 * we need to tell the other side to terminate the accept
524 */
525 if (err) {
526 dev_err(&ep->remote_dev->sdev->dev,
527 "%s %d err %d\n", __func__, __LINE__, err);
528 msg.uop = SCIF_CNCT_GNTNACK;
529 msg.payload[0] = ep->remote_ep;
530 _scif_nodeqp_send(ep->remote_dev, &msg);
531 ep->state = SCIFEP_BOUND;
532 goto connect_error_dec;
533 }
534
535 msg.uop = SCIF_CNCT_GNTACK;
536 msg.payload[0] = ep->remote_ep;
537 err = _scif_nodeqp_send(ep->remote_dev, &msg);
538 if (err) {
539 ep->state = SCIFEP_BOUND;
540 goto connect_error_dec;
541 }
542 ep->state = SCIFEP_CONNECTED;
543 mutex_lock(&scif_info.connlock);
544 list_add_tail(&ep->list, &scif_info.connected);
545 mutex_unlock(&scif_info.connlock);
546 dev_dbg(&ep->remote_dev->sdev->dev,
547 "SCIFAPI connect: ep %p connected\n", ep);
548 } else if (ep->state == SCIFEP_BOUND) {
549 dev_dbg(&ep->remote_dev->sdev->dev,
550 "SCIFAPI connect: ep %p connection refused\n", ep);
551 err = -ECONNREFUSED;
552 goto connect_error_dec;
553 }
554 scif_put_peer_dev(spdev);
555 return err;
556connect_error_dec:
557 scif_put_peer_dev(spdev);
558cleanup_qp:
559 scif_cleanup_ep_qp(ep);
560connect_error_simple:
561 return err;
562}
563
564/*
565 * scif_conn_handler:
566 *
567 * Workqueue handler for servicing non-blocking SCIF connect
568 *
569 */
570void scif_conn_handler(struct work_struct *work)
571{
572 struct scif_endpt *ep;
573
574 do {
575 ep = NULL;
576 spin_lock(&scif_info.nb_connect_lock);
577 if (!list_empty(&scif_info.nb_connect_list)) {
578 ep = list_first_entry(&scif_info.nb_connect_list,
579 struct scif_endpt, conn_list);
580 list_del(&ep->conn_list);
581 }
582 spin_unlock(&scif_info.nb_connect_lock);
b7f94441 583 if (ep) {
76371c7c 584 ep->conn_err = scif_conn_func(ep);
b7f94441
AD
585 wake_up_interruptible(&ep->conn_pend_wq);
586 }
76371c7c
NR
587 } while (ep);
588}
589
590int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
591{
592 struct scif_endpt *ep = (struct scif_endpt *)epd;
593 int err = 0;
594 struct scif_dev *remote_dev;
595 struct device *spdev;
596
597 dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
598 scif_ep_states[ep->state]);
599
600 if (!scif_dev || dst->node > scif_info.maxid)
601 return -ENODEV;
602
603 might_sleep();
604
605 remote_dev = &scif_dev[dst->node];
606 spdev = scif_get_peer_dev(remote_dev);
607 if (IS_ERR(spdev)) {
608 err = PTR_ERR(spdev);
609 return err;
610 }
611
612 spin_lock(&ep->lock);
613 switch (ep->state) {
614 case SCIFEP_ZOMBIE:
615 case SCIFEP_CLOSING:
616 err = -EINVAL;
617 break;
618 case SCIFEP_DISCONNECTED:
619 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
620 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
621 else
622 err = -EINVAL;
623 break;
624 case SCIFEP_LISTENING:
625 case SCIFEP_CLLISTEN:
626 err = -EOPNOTSUPP;
627 break;
628 case SCIFEP_CONNECTING:
629 case SCIFEP_MAPPING:
630 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
631 err = -EINPROGRESS;
632 else
633 err = -EISCONN;
634 break;
635 case SCIFEP_CONNECTED:
636 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
637 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
638 else
639 err = -EISCONN;
640 break;
641 case SCIFEP_UNBOUND:
a39284ae
DC
642 err = scif_get_new_port();
643 if (err < 0)
644 break;
645 ep->port.port = err;
646 ep->port.node = scif_info.nodeid;
647 ep->conn_async_state = ASYNC_CONN_IDLE;
76371c7c
NR
648 /* Fall through */
649 case SCIFEP_BOUND:
650 /*
651 * If a non-blocking connect has been already initiated
652 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
653 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
654 * SCIF_BOUND due an error in the connection process
655 * (e.g., connection refused) If conn_async_state is
656 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
657 * so that the error status can be collected. If the state is
658 * already ASYNC_CONN_FLUSH_WORK - then set the error to
659 * EINPROGRESS since some other thread is waiting to collect
660 * error status.
661 */
662 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
663 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
664 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
665 err = -EINPROGRESS;
666 } else {
667 ep->conn_port = *dst;
668 init_waitqueue_head(&ep->sendwq);
669 init_waitqueue_head(&ep->recvwq);
670 init_waitqueue_head(&ep->conwq);
671 ep->conn_async_state = 0;
672
673 if (unlikely(non_block))
674 ep->conn_async_state = ASYNC_CONN_INPROGRESS;
675 }
676 break;
677 }
678
679 if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
680 goto connect_simple_unlock1;
681
682 ep->state = SCIFEP_CONNECTING;
683 ep->remote_dev = &scif_dev[dst->node];
684 ep->qp_info.qp->magic = SCIFEP_MAGIC;
685 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
b7f94441 686 init_waitqueue_head(&ep->conn_pend_wq);
76371c7c
NR
687 spin_lock(&scif_info.nb_connect_lock);
688 list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
689 spin_unlock(&scif_info.nb_connect_lock);
690 err = -EINPROGRESS;
691 schedule_work(&scif_info.conn_work);
692 }
693connect_simple_unlock1:
694 spin_unlock(&ep->lock);
695 scif_put_peer_dev(spdev);
696 if (err) {
697 return err;
698 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
699 flush_work(&scif_info.conn_work);
700 err = ep->conn_err;
701 spin_lock(&ep->lock);
702 ep->conn_async_state = ASYNC_CONN_IDLE;
703 spin_unlock(&ep->lock);
704 } else {
705 err = scif_conn_func(ep);
706 }
707 return err;
708}
709
710int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
711{
712 return __scif_connect(epd, dst, false);
713}
714EXPORT_SYMBOL_GPL(scif_connect);
715
716/**
717 * scif_accept() - Accept a connection request from the remote node
718 *
719 * The function accepts a connection request from the remote node. Successful
720 * complete is indicate by a new end point being created and passed back
721 * to the caller for future reference.
722 *
723 * Upon successful complete a zero will be returned and the peer information
724 * will be filled in.
725 *
726 * If the end point is not in the listening state -EINVAL will be returned.
727 *
728 * If during the connection sequence resource allocation fails the -ENOMEM
729 * will be returned.
730 *
731 * If the function is called with the ASYNC flag set and no connection requests
732 * are pending it will return -EAGAIN.
733 *
734 * If the remote side is not sending any connection requests the caller may
735 * terminate this function with a signal. If so a -EINTR will be returned.
736 */
737int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
738 scif_epd_t *newepd, int flags)
739{
740 struct scif_endpt *lep = (struct scif_endpt *)epd;
741 struct scif_endpt *cep;
742 struct scif_conreq *conreq;
743 struct scifmsg msg;
744 int err;
745 struct device *spdev;
746
747 dev_dbg(scif_info.mdev.this_device,
748 "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
749
750 if (flags & ~SCIF_ACCEPT_SYNC)
751 return -EINVAL;
752
753 if (!peer || !newepd)
754 return -EINVAL;
755
756 might_sleep();
757 spin_lock(&lep->lock);
758 if (lep->state != SCIFEP_LISTENING) {
759 spin_unlock(&lep->lock);
760 return -EINVAL;
761 }
762
763 if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
764 /* No connection request present and we do not want to wait */
765 spin_unlock(&lep->lock);
766 return -EAGAIN;
767 }
768
769 lep->files = current->files;
770retry_connection:
771 spin_unlock(&lep->lock);
772 /* Wait for the remote node to send us a SCIF_CNCT_REQ */
773 err = wait_event_interruptible(lep->conwq,
774 (lep->conreqcnt ||
775 (lep->state != SCIFEP_LISTENING)));
776 if (err)
777 return err;
778
779 if (lep->state != SCIFEP_LISTENING)
780 return -EINTR;
781
782 spin_lock(&lep->lock);
783
784 if (!lep->conreqcnt)
785 goto retry_connection;
786
787 /* Get the first connect request off the list */
788 conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
789 list_del(&conreq->list);
790 lep->conreqcnt--;
791 spin_unlock(&lep->lock);
792
793 /* Fill in the peer information */
794 peer->node = conreq->msg.src.node;
795 peer->port = conreq->msg.src.port;
796
797 cep = kzalloc(sizeof(*cep), GFP_KERNEL);
798 if (!cep) {
799 err = -ENOMEM;
800 goto scif_accept_error_epalloc;
801 }
802 spin_lock_init(&cep->lock);
803 mutex_init(&cep->sendlock);
804 mutex_init(&cep->recvlock);
805 cep->state = SCIFEP_CONNECTING;
806 cep->remote_dev = &scif_dev[peer->node];
807 cep->remote_ep = conreq->msg.payload[0];
808
d1824329
SD
809 scif_rma_ep_init(cep);
810
811 err = scif_reserve_dma_chan(cep);
812 if (err) {
813 dev_err(scif_info.mdev.this_device,
814 "%s %d err %d\n", __func__, __LINE__, err);
815 goto scif_accept_error_qpalloc;
816 }
817
76371c7c
NR
818 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
819 if (!cep->qp_info.qp) {
820 err = -ENOMEM;
821 goto scif_accept_error_qpalloc;
822 }
823
b7f94441
AD
824 err = scif_anon_inode_getfile(cep);
825 if (err)
826 goto scif_accept_error_anon_inode;
827
76371c7c
NR
828 cep->qp_info.qp->magic = SCIFEP_MAGIC;
829 spdev = scif_get_peer_dev(cep->remote_dev);
830 if (IS_ERR(spdev)) {
831 err = PTR_ERR(spdev);
832 goto scif_accept_error_map;
833 }
834 err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
835 conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
836 cep->remote_dev);
837 if (err) {
838 dev_dbg(&cep->remote_dev->sdev->dev,
839 "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
840 lep, cep, err, cep->qp_info.qp_offset);
841 scif_put_peer_dev(spdev);
842 goto scif_accept_error_map;
843 }
844
845 cep->port.node = lep->port.node;
846 cep->port.port = lep->port.port;
847 cep->peer.node = peer->node;
848 cep->peer.port = peer->port;
849 init_waitqueue_head(&cep->sendwq);
850 init_waitqueue_head(&cep->recvwq);
851 init_waitqueue_head(&cep->conwq);
852
853 msg.uop = SCIF_CNCT_GNT;
854 msg.src = cep->port;
855 msg.payload[0] = cep->remote_ep;
856 msg.payload[1] = cep->qp_info.qp_offset;
857 msg.payload[2] = (u64)cep;
858
859 err = _scif_nodeqp_send(cep->remote_dev, &msg);
860 scif_put_peer_dev(spdev);
861 if (err)
862 goto scif_accept_error_map;
863retry:
864 /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
865 err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
866 SCIF_NODE_ACCEPT_TIMEOUT);
867 if (!err && scifdev_alive(cep))
868 goto retry;
869 err = !err ? -ENODEV : 0;
870 if (err)
871 goto scif_accept_error_map;
872 kfree(conreq);
873
874 spin_lock(&cep->lock);
875
876 if (cep->state == SCIFEP_CLOSING) {
877 /*
878 * Remote failed to allocate resources and NAKed the grant.
879 * There is at this point nothing referencing the new end point.
880 */
881 spin_unlock(&cep->lock);
882 scif_teardown_ep(cep);
883 kfree(cep);
884
885 /* If call with sync flag then go back and wait. */
886 if (flags & SCIF_ACCEPT_SYNC) {
887 spin_lock(&lep->lock);
888 goto retry_connection;
889 }
890 return -EAGAIN;
891 }
892
893 scif_get_port(cep->port.port);
894 *newepd = (scif_epd_t)cep;
895 spin_unlock(&cep->lock);
896 return 0;
897scif_accept_error_map:
b7f94441
AD
898 scif_anon_inode_fput(cep);
899scif_accept_error_anon_inode:
76371c7c
NR
900 scif_teardown_ep(cep);
901scif_accept_error_qpalloc:
902 kfree(cep);
903scif_accept_error_epalloc:
904 msg.uop = SCIF_CNCT_REJ;
905 msg.dst.node = conreq->msg.src.node;
906 msg.dst.port = conreq->msg.src.port;
907 msg.payload[0] = conreq->msg.payload[0];
908 msg.payload[1] = conreq->msg.payload[1];
909 scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
910 kfree(conreq);
911 return err;
912}
913EXPORT_SYMBOL_GPL(scif_accept);
fdd9fd5c
SD
914
915/*
916 * scif_msg_param_check:
917 * @epd: The end point returned from scif_open()
918 * @len: Length to receive
919 * @flags: blocking or non blocking
920 *
921 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
922 */
923static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
924{
925 int ret = -EINVAL;
926
927 if (len < 0)
928 goto err_ret;
929 if (flags && (!(flags & SCIF_RECV_BLOCK)))
930 goto err_ret;
931 ret = 0;
932err_ret:
933 return ret;
934}
935
936static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
937{
938 struct scif_endpt *ep = (struct scif_endpt *)epd;
939 struct scifmsg notif_msg;
940 int curr_xfer_len = 0, sent_len = 0, write_count;
941 int ret = 0;
942 struct scif_qp *qp = ep->qp_info.qp;
943
944 if (flags & SCIF_SEND_BLOCK)
945 might_sleep();
946
947 spin_lock(&ep->lock);
948 while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
949 write_count = scif_rb_space(&qp->outbound_q);
950 if (write_count) {
951 /* Best effort to send as much data as possible */
952 curr_xfer_len = min(len - sent_len, write_count);
953 ret = scif_rb_write(&qp->outbound_q, msg,
954 curr_xfer_len);
955 if (ret < 0)
956 break;
957 /* Success. Update write pointer */
958 scif_rb_commit(&qp->outbound_q);
959 /*
960 * Send a notification to the peer about the
961 * produced data message.
962 */
963 notif_msg.src = ep->port;
964 notif_msg.uop = SCIF_CLIENT_SENT;
965 notif_msg.payload[0] = ep->remote_ep;
966 ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
967 if (ret)
968 break;
969 sent_len += curr_xfer_len;
970 msg = msg + curr_xfer_len;
971 continue;
972 }
973 curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
974 /* Not enough RB space. return for the Non Blocking case */
975 if (!(flags & SCIF_SEND_BLOCK))
976 break;
977
978 spin_unlock(&ep->lock);
979 /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
980 ret =
981 wait_event_interruptible(ep->sendwq,
982 (SCIFEP_CONNECTED != ep->state) ||
983 (scif_rb_space(&qp->outbound_q) >=
984 curr_xfer_len));
985 spin_lock(&ep->lock);
986 if (ret)
987 break;
988 }
989 if (sent_len)
990 ret = sent_len;
991 else if (!ret && SCIFEP_CONNECTED != ep->state)
992 ret = SCIFEP_DISCONNECTED == ep->state ?
993 -ECONNRESET : -ENOTCONN;
994 spin_unlock(&ep->lock);
995 return ret;
996}
997
998static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
999{
1000 int read_size;
1001 struct scif_endpt *ep = (struct scif_endpt *)epd;
1002 struct scifmsg notif_msg;
1003 int curr_recv_len = 0, remaining_len = len, read_count;
1004 int ret = 0;
1005 struct scif_qp *qp = ep->qp_info.qp;
1006
1007 if (flags & SCIF_RECV_BLOCK)
1008 might_sleep();
1009 spin_lock(&ep->lock);
1010 while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1011 SCIFEP_DISCONNECTED == ep->state)) {
1012 read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1013 if (read_count) {
1014 /*
1015 * Best effort to recv as much data as there
1016 * are bytes to read in the RB particularly
1017 * important for the Non Blocking case.
1018 */
1019 curr_recv_len = min(remaining_len, read_count);
1020 read_size = scif_rb_get_next(&qp->inbound_q,
1021 msg, curr_recv_len);
1022 if (ep->state == SCIFEP_CONNECTED) {
1023 /*
1024 * Update the read pointer only if the endpoint
1025 * is still connected else the read pointer
1026 * might no longer exist since the peer has
1027 * freed resources!
1028 */
1029 scif_rb_update_read_ptr(&qp->inbound_q);
1030 /*
1031 * Send a notification to the peer about the
1032 * consumed data message only if the EP is in
1033 * SCIFEP_CONNECTED state.
1034 */
1035 notif_msg.src = ep->port;
1036 notif_msg.uop = SCIF_CLIENT_RCVD;
1037 notif_msg.payload[0] = ep->remote_ep;
1038 ret = _scif_nodeqp_send(ep->remote_dev,
1039 &notif_msg);
1040 if (ret)
1041 break;
1042 }
1043 remaining_len -= curr_recv_len;
1044 msg = msg + curr_recv_len;
1045 continue;
1046 }
1047 /*
1048 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1049 * we will keep looping forever.
1050 */
1051 if (ep->state == SCIFEP_DISCONNECTED)
1052 break;
1053 /*
1054 * Return in the Non Blocking case if there is no data
1055 * to read in this iteration.
1056 */
1057 if (!(flags & SCIF_RECV_BLOCK))
1058 break;
1059 curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1060 spin_unlock(&ep->lock);
1061 /*
1062 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1063 * or until other side disconnects.
1064 */
1065 ret =
1066 wait_event_interruptible(ep->recvwq,
1067 SCIFEP_CONNECTED != ep->state ||
1068 scif_rb_count(&qp->inbound_q,
1069 curr_recv_len)
1070 >= curr_recv_len);
1071 spin_lock(&ep->lock);
1072 if (ret)
1073 break;
1074 }
1075 if (len - remaining_len)
1076 ret = len - remaining_len;
1077 else if (!ret && ep->state != SCIFEP_CONNECTED)
1078 ret = ep->state == SCIFEP_DISCONNECTED ?
1079 -ECONNRESET : -ENOTCONN;
1080 spin_unlock(&ep->lock);
1081 return ret;
1082}
1083
1084/**
1085 * scif_user_send() - Send data to connection queue
1086 * @epd: The end point returned from scif_open()
1087 * @msg: Address to place data
1088 * @len: Length to receive
1089 * @flags: blocking or non blocking
1090 *
1091 * This function is called from the driver IOCTL entry point
1092 * only and is a wrapper for _scif_send().
1093 */
1094int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1095{
1096 struct scif_endpt *ep = (struct scif_endpt *)epd;
1097 int err = 0;
1098 int sent_len = 0;
1099 char *tmp;
1100 int loop_len;
1101 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1102
1103 dev_dbg(scif_info.mdev.this_device,
1104 "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1105 if (!len)
1106 return 0;
1107
1108 err = scif_msg_param_check(epd, len, flags);
1109 if (err)
1110 goto send_err;
1111
1112 tmp = kmalloc(chunk_len, GFP_KERNEL);
1113 if (!tmp) {
1114 err = -ENOMEM;
1115 goto send_err;
1116 }
1117 /*
1118 * Grabbing the lock before breaking up the transfer in
1119 * multiple chunks is required to ensure that messages do
1120 * not get fragmented and reordered.
1121 */
1122 mutex_lock(&ep->sendlock);
1123 while (sent_len != len) {
1124 loop_len = len - sent_len;
1125 loop_len = min(chunk_len, loop_len);
1126 if (copy_from_user(tmp, msg, loop_len)) {
1127 err = -EFAULT;
1128 goto send_free_err;
1129 }
1130 err = _scif_send(epd, tmp, loop_len, flags);
1131 if (err < 0)
1132 goto send_free_err;
1133 sent_len += err;
1134 msg += err;
1135 if (err != loop_len)
1136 goto send_free_err;
1137 }
1138send_free_err:
1139 mutex_unlock(&ep->sendlock);
1140 kfree(tmp);
1141send_err:
1142 return err < 0 ? err : sent_len;
1143}
1144
1145/**
1146 * scif_user_recv() - Receive data from connection queue
1147 * @epd: The end point returned from scif_open()
1148 * @msg: Address to place data
1149 * @len: Length to receive
1150 * @flags: blocking or non blocking
1151 *
1152 * This function is called from the driver IOCTL entry point
1153 * only and is a wrapper for _scif_recv().
1154 */
1155int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1156{
1157 struct scif_endpt *ep = (struct scif_endpt *)epd;
1158 int err = 0;
1159 int recv_len = 0;
1160 char *tmp;
1161 int loop_len;
1162 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1163
1164 dev_dbg(scif_info.mdev.this_device,
1165 "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1166 if (!len)
1167 return 0;
1168
1169 err = scif_msg_param_check(epd, len, flags);
1170 if (err)
1171 goto recv_err;
1172
1173 tmp = kmalloc(chunk_len, GFP_KERNEL);
1174 if (!tmp) {
1175 err = -ENOMEM;
1176 goto recv_err;
1177 }
1178 /*
1179 * Grabbing the lock before breaking up the transfer in
1180 * multiple chunks is required to ensure that messages do
1181 * not get fragmented and reordered.
1182 */
1183 mutex_lock(&ep->recvlock);
1184 while (recv_len != len) {
1185 loop_len = len - recv_len;
1186 loop_len = min(chunk_len, loop_len);
1187 err = _scif_recv(epd, tmp, loop_len, flags);
1188 if (err < 0)
1189 goto recv_free_err;
1190 if (copy_to_user(msg, tmp, err)) {
1191 err = -EFAULT;
1192 goto recv_free_err;
1193 }
1194 recv_len += err;
1195 msg += err;
1196 if (err != loop_len)
1197 goto recv_free_err;
1198 }
1199recv_free_err:
1200 mutex_unlock(&ep->recvlock);
1201 kfree(tmp);
1202recv_err:
1203 return err < 0 ? err : recv_len;
1204}
1205
1206/**
1207 * scif_send() - Send data to connection queue
1208 * @epd: The end point returned from scif_open()
1209 * @msg: Address to place data
1210 * @len: Length to receive
1211 * @flags: blocking or non blocking
1212 *
1213 * This function is called from the kernel mode only and is
1214 * a wrapper for _scif_send().
1215 */
1216int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1217{
1218 struct scif_endpt *ep = (struct scif_endpt *)epd;
1219 int ret;
1220
1221 dev_dbg(scif_info.mdev.this_device,
1222 "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1223 if (!len)
1224 return 0;
1225
1226 ret = scif_msg_param_check(epd, len, flags);
1227 if (ret)
1228 return ret;
1229 if (!ep->remote_dev)
1230 return -ENOTCONN;
1231 /*
1232 * Grab the mutex lock in the blocking case only
1233 * to ensure messages do not get fragmented/reordered.
1234 * The non blocking mode is protected using spin locks
1235 * in _scif_send().
1236 */
1237 if (flags & SCIF_SEND_BLOCK)
1238 mutex_lock(&ep->sendlock);
1239
1240 ret = _scif_send(epd, msg, len, flags);
1241
1242 if (flags & SCIF_SEND_BLOCK)
1243 mutex_unlock(&ep->sendlock);
1244 return ret;
1245}
1246EXPORT_SYMBOL_GPL(scif_send);
1247
1248/**
1249 * scif_recv() - Receive data from connection queue
1250 * @epd: The end point returned from scif_open()
1251 * @msg: Address to place data
1252 * @len: Length to receive
1253 * @flags: blocking or non blocking
1254 *
1255 * This function is called from the kernel mode only and is
1256 * a wrapper for _scif_recv().
1257 */
1258int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1259{
1260 struct scif_endpt *ep = (struct scif_endpt *)epd;
1261 int ret;
1262
1263 dev_dbg(scif_info.mdev.this_device,
1264 "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1265 if (!len)
1266 return 0;
1267
1268 ret = scif_msg_param_check(epd, len, flags);
1269 if (ret)
1270 return ret;
1271 /*
1272 * Grab the mutex lock in the blocking case only
1273 * to ensure messages do not get fragmented/reordered.
1274 * The non blocking mode is protected using spin locks
1275 * in _scif_send().
1276 */
1277 if (flags & SCIF_RECV_BLOCK)
1278 mutex_lock(&ep->recvlock);
1279
1280 ret = _scif_recv(epd, msg, len, flags);
1281
1282 if (flags & SCIF_RECV_BLOCK)
1283 mutex_unlock(&ep->recvlock);
1284
1285 return ret;
1286}
1287EXPORT_SYMBOL_GPL(scif_recv);
1288
b7f94441
AD
1289static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1290 poll_table *p, struct scif_endpt *ep)
1291{
1292 /*
1293 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1294 * and regrab it afterwards. Because the endpoint state might have
1295 * changed while the lock was given up, the state must be checked
1296 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1297 * does this.
1298 */
1299 spin_unlock(&ep->lock);
1300 poll_wait(f, wq, p);
1301 spin_lock(&ep->lock);
1302}
1303
afc9a42b 1304__poll_t
b7f94441
AD
1305__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1306{
afc9a42b 1307 __poll_t mask = 0;
b7f94441
AD
1308
1309 dev_dbg(scif_info.mdev.this_device,
1310 "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1311
1312 spin_lock(&ep->lock);
1313
1314 /* Endpoint is waiting for a non-blocking connect to complete */
1315 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1316 _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1317 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1318 if (ep->state == SCIFEP_CONNECTED ||
1319 ep->state == SCIFEP_DISCONNECTED ||
1320 ep->conn_err)
a9a08845 1321 mask |= EPOLLOUT;
b7f94441
AD
1322 goto exit;
1323 }
1324 }
1325
1326 /* Endpoint is listening for incoming connection requests */
1327 if (ep->state == SCIFEP_LISTENING) {
1328 _scif_poll_wait(f, &ep->conwq, wait, ep);
1329 if (ep->state == SCIFEP_LISTENING) {
1330 if (ep->conreqcnt)
a9a08845 1331 mask |= EPOLLIN;
b7f94441
AD
1332 goto exit;
1333 }
1334 }
1335
1336 /* Endpoint is connected or disconnected */
1337 if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
a9a08845 1338 if (poll_requested_events(wait) & EPOLLIN)
b7f94441 1339 _scif_poll_wait(f, &ep->recvwq, wait, ep);
a9a08845 1340 if (poll_requested_events(wait) & EPOLLOUT)
b7f94441
AD
1341 _scif_poll_wait(f, &ep->sendwq, wait, ep);
1342 if (ep->state == SCIFEP_CONNECTED ||
1343 ep->state == SCIFEP_DISCONNECTED) {
1344 /* Data can be read without blocking */
1345 if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
a9a08845 1346 mask |= EPOLLIN;
b7f94441
AD
1347 /* Data can be written without blocking */
1348 if (scif_rb_space(&ep->qp_info.qp->outbound_q))
a9a08845
LT
1349 mask |= EPOLLOUT;
1350 /* Return EPOLLHUP if endpoint is disconnected */
b7f94441 1351 if (ep->state == SCIFEP_DISCONNECTED)
a9a08845 1352 mask |= EPOLLHUP;
b7f94441
AD
1353 goto exit;
1354 }
1355 }
1356
a9a08845
LT
1357 /* Return EPOLLERR if the endpoint is in none of the above states */
1358 mask |= EPOLLERR;
b7f94441
AD
1359exit:
1360 spin_unlock(&ep->lock);
1361 return mask;
1362}
1363
1364/**
1365 * scif_poll() - Kernel mode SCIF poll
1366 * @ufds: Array of scif_pollepd structures containing the end points
1367 * and events to poll on
1368 * @nfds: Size of the ufds array
1369 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1370 *
1371 * The code flow in this function is based on do_poll(..) in select.c
1372 *
1373 * Returns the number of endpoints which have pending events or 0 in
1374 * the event of a timeout. If a signal is used for wake up, -EINTR is
1375 * returned.
1376 */
1377int
1378scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1379{
1380 struct poll_wqueues table;
1381 poll_table *pt;
afc9a42b
AV
1382 int i, count = 0, timed_out = timeout_msecs == 0;
1383 __poll_t mask;
b7f94441
AD
1384 u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1385 : msecs_to_jiffies(timeout_msecs);
1386
1387 poll_initwait(&table);
1388 pt = &table.pt;
1389 while (1) {
1390 for (i = 0; i < nfds; i++) {
a9a08845 1391 pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
b7f94441
AD
1392 mask = __scif_pollfd(ufds[i].epd->anon,
1393 pt, ufds[i].epd);
a9a08845 1394 mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
b7f94441
AD
1395 if (mask) {
1396 count++;
1397 pt->_qproc = NULL;
1398 }
1399 ufds[i].revents = mask;
1400 }
1401 pt->_qproc = NULL;
1402 if (!count) {
1403 count = table.error;
1404 if (signal_pending(current))
1405 count = -EINTR;
1406 }
1407 if (count || timed_out)
1408 break;
1409
1410 if (!schedule_timeout_interruptible(timeout))
1411 timed_out = 1;
1412 }
1413 poll_freewait(&table);
1414 return count;
1415}
1416EXPORT_SYMBOL_GPL(scif_poll);
1417
fdd9fd5c
SD
1418int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1419{
1420 int online = 0;
1421 int offset = 0;
1422 int node;
1423
1424 if (!scif_is_mgmt_node())
1425 scif_get_node_info();
1426
1427 *self = scif_info.nodeid;
1428 mutex_lock(&scif_info.conflock);
1429 len = min_t(int, len, scif_info.total);
1430 for (node = 0; node <= scif_info.maxid; node++) {
1431 if (_scifdev_alive(&scif_dev[node])) {
1432 online++;
1433 if (offset < len)
1434 nodes[offset++] = node;
1435 }
1436 }
1437 dev_dbg(scif_info.mdev.this_device,
1438 "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1439 scif_info.total, online, offset);
1440 mutex_unlock(&scif_info.conflock);
1441
1442 return online;
1443}
1444EXPORT_SYMBOL_GPL(scif_get_node_ids);
d3d912eb
AD
1445
1446static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1447{
1448 struct scif_client *client =
1449 container_of(si, struct scif_client, si);
1450 struct scif_peer_dev *spdev =
1451 container_of(dev, struct scif_peer_dev, dev);
1452
1453 if (client->probe)
1454 client->probe(spdev);
1455 return 0;
1456}
1457
1458static void scif_remove_client_dev(struct device *dev,
1459 struct subsys_interface *si)
1460{
1461 struct scif_client *client =
1462 container_of(si, struct scif_client, si);
1463 struct scif_peer_dev *spdev =
1464 container_of(dev, struct scif_peer_dev, dev);
1465
1466 if (client->remove)
1467 client->remove(spdev);
1468}
1469
1470void scif_client_unregister(struct scif_client *client)
1471{
1472 subsys_interface_unregister(&client->si);
1473}
1474EXPORT_SYMBOL_GPL(scif_client_unregister);
1475
1476int scif_client_register(struct scif_client *client)
1477{
1478 struct subsys_interface *si = &client->si;
1479
1480 si->name = client->name;
1481 si->subsys = &scif_peer_bus;
1482 si->add_dev = scif_add_client_dev;
1483 si->remove_dev = scif_remove_client_dev;
1484
1485 return subsys_interface_register(&client->si);
1486}
1487EXPORT_SYMBOL_GPL(scif_client_register);