1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
|
/*
* This file implements the lower socket layer of VFS: communication with
* socket drivers. Socket driver communication evolved out of character driver
* communication, and the two have many similarities between them. Most
* importantly, socket driver communication also has the distinction between
* short-lived and long-lived requests.
*
* Short-lived requests are expected to be replied to by the socket driver
* immediately in all cases. For such requests, VFS keeps the worker thread
* for the calling process alive until the reply arrives. In contrast,
* long-lived requests may block. For such requests, VFS suspends the calling
* process until a reply comes in, or until a signal interrupts the request.
* Both short-lived and long-lived requests may be aborted if VFS finds that
* the corresponding socket driver has died. Even though long-lived requests
* may be marked as nonblocking, nonblocking calls are still handled as
* long-lived in terms of VFS processing.
*
* For an overview of the socket driver requests and replies, message layouts,
* and which requests are long-lived or short-lived (i.e. may suspend or not),
* please refer to the corresponding table in the libsockdriver source code.
*
* For most long-lived socket requests, the main VFS thread processes the reply
* from the socket driver. This typically consists of waking up the user
* process that originally issued the system call on the socket by simply
* relaying the call's result code. Some socket calls require a specific reply
* message and/or additional post-call actions; for those, resume_*() calls are
* made back into the upper socket layer.
*
* If a process is interrupted by a signal, any ongoing long-lived socket
* request must be canceled. This is done by sending a one-way cancel request
* to the socket driver, and waiting for it to reply to the original request.
* In this case, the reply will be processed from the worker thread that is
* handling the cancel operation. Canceling does not imply call failure: the
* cancellation may result in a partial I/O reply, and a successful reply may
* cross the cancel request.
*
* One main exception is the reply to an accept request. Once a connection has
* been accepted, a new socket has to be created for it. This requires actions
* that require the ability to block the current thread, and so, a worker
* thread is spawned for processing successful accept replies, unless the reply
* was received from a worker thread already (as may be the case if the accept
* request was being canceled).
*/
#include "fs.h"
#include <sys/socket.h>
#include <minix/callnr.h>
/*
* Send a short-lived request message to the given socket driver, and suspend
* the current worker thread until a reply message has been received. On
* success, the function will return OK, and the reply message will be stored
* in the message structure pointed to by 'm_ptr'. The function may fail if
* the socket driver dies before sending a reply. In that case, the function
* will return a negative error code, and also store the same negative error
* code in the m_type field of the 'm_ptr' message structure.
*/
static int
sdev_sendrec(struct smap * sp, message * m_ptr)
{
int r;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, m_ptr, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_sendrec failed: %d", r);
/* Suspend this thread until we have received the response. */
self->w_task = sp->smap_endpt;
self->w_drv_sendrec = m_ptr;
worker_wait();
self->w_task = NONE;
assert(self->w_drv_sendrec == NULL);
return (!IS_SDEV_RS(m_ptr->m_type)) ? m_ptr->m_type : OK;
}
/*
* Suspend the current process for later completion of its system call.
*/
int
sdev_suspend(dev_t dev, cp_grant_id_t grant0, cp_grant_id_t grant1,
cp_grant_id_t grant2, int fd, vir_bytes buf)
{
fp->fp_sdev.dev = dev;
fp->fp_sdev.callnr = job_call_nr;
fp->fp_sdev.grant[0] = grant0;
fp->fp_sdev.grant[1] = grant1;
fp->fp_sdev.grant[2] = grant2;
if (job_call_nr == VFS_ACCEPT) {
assert(fd != -1);
assert(buf == 0);
fp->fp_sdev.aux.fd = fd;
} else if (job_call_nr == VFS_RECVMSG) {
assert(fd == -1);
/*
* TODO: we are not yet consistent enough in dealing with
* mapped NULL pages to have an assert(buf != 0) here..
*/
fp->fp_sdev.aux.buf = buf;
} else {
assert(fd == -1);
assert(buf == 0);
}
suspend(FP_BLOCKED_ON_SDEV);
return SUSPEND;
}
/*
* Create a socket or socket pair. Return OK on success, with the new socket
* device identifier(s) stored in the 'dev' array. Return an error code upon
* failure.
*/
int
sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair)
{
struct smap *sp;
message m;
sockid_t sock_id, sock_id2;
int r;
/* We could return EAFNOSUPPORT, but the caller should have checked. */
if ((sp = get_smap_by_domain(domain)) == NULL)
panic("VFS: sdev_socket for unknown domain");
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = pair ? SDEV_SOCKETPAIR : SDEV_SOCKET;
m.m_vfs_lsockdriver_socket.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_socket.domain = domain;
m.m_vfs_lsockdriver_socket.type = type;
m.m_vfs_lsockdriver_socket.protocol = protocol;
m.m_vfs_lsockdriver_socket.user_endpt = who_e;
/* Send the request, and wait for the reply. */
if ((r = sdev_sendrec(sp, &m)) != OK)
return r; /* socket driver died */
/* Parse the reply message, and check for protocol errors. */
if (m.m_type != SDEV_SOCKET_REPLY) {
printf("VFS: %d sent bad reply type %d for call %d\n",
sp->smap_endpt, m.m_type, job_call_nr);
return EIO;
}
sock_id = m.m_lsockdriver_vfs_socket_reply.sock_id;
sock_id2 = m.m_lsockdriver_vfs_socket_reply.sock_id2;
/* Check for regular errors. Upon success, return the socket(s). */
if (sock_id < 0)
return sock_id;
dev[0] = make_smap_dev(sp, sock_id);
if (pair) {
/* Okay, one more protocol error. */
if (sock_id2 < 0) {
printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n",
sp->smap_endpt, sock_id2);
(void)sdev_close(dev[0], FALSE /*may_suspend*/);
return EIO;
}
dev[1] = make_smap_dev(sp, sock_id2);
}
return OK;
}
/*
* Bind or connect a socket to a particular address. These calls may block, so
* suspend the current process instead of making the thread wait for the reply.
*/
static int
sdev_bindconn(dev_t dev, int type, vir_bytes addr, unsigned int addr_len,
int filp_flags)
{
struct smap *sp;
sockid_t sock_id;
cp_grant_id_t grant;
message m;
int r;
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Allocate resources. */
grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len,
CPF_READ);
if (!GRANT_VALID(grant))
panic("VFS: cpf_grant_magic failed");
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = type;
m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_addr.sock_id = sock_id;
m.m_vfs_lsockdriver_addr.grant = grant;
m.m_vfs_lsockdriver_addr.len = addr_len;
m.m_vfs_lsockdriver_addr.user_endpt = who_e;
m.m_vfs_lsockdriver_addr.sflags =
(filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_bindconn failed: %d", r);
/* Suspend the process until the reply arrives. */
return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0);
}
/*
* Bind a socket to a local address.
*/
int
sdev_bind(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags)
{
return sdev_bindconn(dev, SDEV_BIND, addr, addr_len, filp_flags);
}
/*
* Connect a socket to a remote address.
*/
int
sdev_connect(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags)
{
return sdev_bindconn(dev, SDEV_CONNECT, addr, addr_len, filp_flags);
}
/*
* Send and receive a "simple" request: listen, shutdown, or close. Note that
* while cancel requests use the same request format, they require a different
* way of handling their replies.
*/
static int
sdev_simple(dev_t dev, int type, int param)
{
struct smap *sp;
sockid_t sock_id;
message m;
int r;
assert(type == SDEV_LISTEN || type == SDEV_SHUTDOWN ||
type == SDEV_CLOSE);
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = type;
m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_simple.sock_id = sock_id;
m.m_vfs_lsockdriver_simple.param = param;
/* Send the request, and wait for the reply. */
if ((r = sdev_sendrec(sp, &m)) != OK)
return r; /* socket driver died */
/* Parse and return the reply. */
if (m.m_type != SDEV_REPLY) {
printf("VFS: %d sent bad reply type %d for call %d\n",
sp->smap_endpt, m.m_type, job_call_nr);
return EIO;
}
return m.m_lsockdriver_vfs_reply.status;
}
/*
* Put a socket in listening mode.
*/
int
sdev_listen(dev_t dev, int backlog)
{
assert(backlog >= 0);
return sdev_simple(dev, SDEV_LISTEN, backlog);
}
/*
* Accept a new connection on a socket.
*/
int
sdev_accept(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags,
int listen_fd)
{
struct smap *sp;
sockid_t sock_id;
cp_grant_id_t grant;
message m;
int r;
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Allocate resources. */
if (addr != 0) {
grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len,
CPF_WRITE);
if (!GRANT_VALID(grant))
panic("VFS: cpf_grant_magic failed");
} else
grant = GRANT_INVALID;
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = SDEV_ACCEPT;
m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_addr.sock_id = sock_id;
m.m_vfs_lsockdriver_addr.grant = grant;
m.m_vfs_lsockdriver_addr.len = addr_len;
m.m_vfs_lsockdriver_addr.user_endpt = who_e;
m.m_vfs_lsockdriver_addr.sflags =
(filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_accept failed: %d", r);
/* Suspend the process until the reply arrives. */
return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID,
listen_fd, 0);
}
/*
* Send or receive a message on a socket. All read (read(2), recvfrom(2), and
* recvmsg(2)) and write (write(2), sendto(2), sendmsg(2)) system calls on
* sockets pass through this function. The function is named sdev_readwrite
* rather than sdev_sendrecv to avoid confusion with sdev_sendrec.
*/
int
sdev_readwrite(dev_t dev, vir_bytes data_buf, size_t data_len,
vir_bytes ctl_buf, unsigned int ctl_len, vir_bytes addr_buf,
unsigned int addr_len, int flags, int rw_flag, int filp_flags,
vir_bytes user_buf)
{
struct smap *sp;
sockid_t sock_id;
cp_grant_id_t data_grant, ctl_grant, addr_grant;
message m;
int r, bits;
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Allocate resources. */
data_grant = GRANT_INVALID;
ctl_grant = GRANT_INVALID;
addr_grant = GRANT_INVALID;
bits = (rw_flag == WRITING) ? CPF_READ : CPF_WRITE;
/*
* Supposedly it is allowed to send or receive zero data bytes, even
* though it is a bad idea as the return value will then be zero, which
* may also indicate EOF (as per W. Richard Stevens).
*/
if (data_buf != 0) {
data_grant = cpf_grant_magic(sp->smap_endpt, who_e, data_buf,
data_len, bits);
if (!GRANT_VALID(data_grant))
panic("VFS: cpf_grant_magic failed");
}
if (ctl_buf != 0) {
ctl_grant = cpf_grant_magic(sp->smap_endpt, who_e, ctl_buf,
ctl_len, bits);
if (!GRANT_VALID(ctl_grant))
panic("VFS: cpf_grant_magic failed");
}
if (addr_buf != 0) {
addr_grant = cpf_grant_magic(sp->smap_endpt, who_e, addr_buf,
addr_len, bits);
if (!GRANT_VALID(addr_grant))
panic("VFS: cpf_grant_magic failed");
}
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = (rw_flag == WRITING) ? SDEV_SEND : SDEV_RECV;
m.m_vfs_lsockdriver_sendrecv.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_sendrecv.sock_id = sock_id;
m.m_vfs_lsockdriver_sendrecv.data_grant = data_grant;
m.m_vfs_lsockdriver_sendrecv.data_len = data_len;
m.m_vfs_lsockdriver_sendrecv.ctl_grant = ctl_grant;
m.m_vfs_lsockdriver_sendrecv.ctl_len = ctl_len;
m.m_vfs_lsockdriver_sendrecv.addr_grant = addr_grant;
m.m_vfs_lsockdriver_sendrecv.addr_len = addr_len;
m.m_vfs_lsockdriver_sendrecv.user_endpt = who_e;
m.m_vfs_lsockdriver_sendrecv.flags = flags;
if (filp_flags & O_NONBLOCK)
m.m_vfs_lsockdriver_sendrecv.flags |= MSG_DONTWAIT;
if (rw_flag == WRITING && (filp_flags & O_NOSIGPIPE))
m.m_vfs_lsockdriver_sendrecv.flags |= MSG_NOSIGNAL;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_readwrite failed: %d", r);
/* Suspend the process until the reply arrives. */
return sdev_suspend(dev, data_grant, ctl_grant, addr_grant, -1,
user_buf);
}
/*
* Perform I/O control.
*/
int
sdev_ioctl(dev_t dev, unsigned long request, vir_bytes buf, int filp_flags)
{
struct smap *sp;
sockid_t sock_id;
cp_grant_id_t grant;
message m;
int r;
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Allocate resources. */
grant = make_ioctl_grant(sp->smap_endpt, who_e, buf, request);
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = SDEV_IOCTL;
m.m_vfs_lsockdriver_ioctl.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_ioctl.sock_id = sock_id;
m.m_vfs_lsockdriver_ioctl.request = request;
m.m_vfs_lsockdriver_ioctl.grant = grant;
m.m_vfs_lsockdriver_ioctl.user_endpt = who_e;
m.m_vfs_lsockdriver_ioctl.sflags =
(filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_ioctl failed: %d", r);
/* Suspend the process until the reply arrives. */
return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0);
}
/*
* Set socket options.
*/
int
sdev_setsockopt(dev_t dev, int level, int name, vir_bytes addr,
unsigned int len)
{
struct smap *sp;
sockid_t sock_id;
cp_grant_id_t grant;
message m;
int r;
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Allocate resources. */
grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, len, CPF_READ);
if (!GRANT_VALID(grant))
panic("VFS: cpf_grant_magic failed");
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = SDEV_SETSOCKOPT;
m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_getset.sock_id = sock_id;
m.m_vfs_lsockdriver_getset.level = level;
m.m_vfs_lsockdriver_getset.name = name;
m.m_vfs_lsockdriver_getset.grant = grant;
m.m_vfs_lsockdriver_getset.len = len;
/* Send the request, and wait for the reply. */
r = sdev_sendrec(sp, &m);
/* Free resources. */
(void)cpf_revoke(grant);
if (r != OK)
return r; /* socket driver died */
/* Parse and return the reply. */
if (m.m_type != SDEV_REPLY) {
printf("VFS: %d sent bad reply type %d for call %d\n",
sp->smap_endpt, m.m_type, job_call_nr);
return EIO;
}
return m.m_lsockdriver_vfs_reply.status;
}
/*
* Send and receive a "get" request: getsockopt, getsockname, or getpeername.
*/
static int
sdev_get(dev_t dev, int type, int level, int name, vir_bytes addr,
unsigned int * len)
{
struct smap *sp;
sockid_t sock_id;
cp_grant_id_t grant;
message m;
int r;
assert(type == SDEV_GETSOCKOPT || type == SDEV_GETSOCKNAME ||
type == SDEV_GETPEERNAME);
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Allocate resources. */
grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, *len, CPF_WRITE);
if (!GRANT_VALID(grant))
panic("VFS: cpf_grant_magic failed");
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = type;
m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_getset.sock_id = sock_id;
m.m_vfs_lsockdriver_getset.level = level;
m.m_vfs_lsockdriver_getset.name = name;
m.m_vfs_lsockdriver_getset.grant = grant;
m.m_vfs_lsockdriver_getset.len = *len;
/* Send the request, and wait for the reply. */
r = sdev_sendrec(sp, &m);
/* Free resources. */
(void)cpf_revoke(grant);
if (r != OK)
return r; /* socket driver died */
/* Parse and return the reply. */
if (m.m_type != SDEV_REPLY) {
printf("VFS: %d sent bad reply type %d for call %d\n",
sp->smap_endpt, m.m_type, job_call_nr);
return EIO;
}
if ((r = m.m_lsockdriver_vfs_reply.status) < 0)
return r;
*len = (unsigned int)r;
return OK;
}
/*
* Get socket options.
*/
int
sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr,
unsigned int * len)
{
return sdev_get(dev, SDEV_GETSOCKOPT, level, name, addr, len);
}
/*
* Get the local address of a socket.
*/
int
sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int * addr_len)
{
return sdev_get(dev, SDEV_GETSOCKNAME, 0, 0, addr, addr_len);
}
/*
* Get the remote address of a socket.
*/
int
sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int * addr_len)
{
return sdev_get(dev, SDEV_GETPEERNAME, 0, 0, addr, addr_len);
}
/*
* Shut down socket send and receive operations.
*/
int
sdev_shutdown(dev_t dev, int how)
{
assert(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR);
return sdev_simple(dev, SDEV_SHUTDOWN, how);
}
/*
* Close the socket identified by the given socket device number.
*/
int
sdev_close(dev_t dev, int may_suspend)
{
struct smap *sp;
sockid_t sock_id;
message m;
int r;
/*
* Originally, all close requests were blocking the calling thread, but
* the new support for SO_LINGER has changed that. In a very strictly
* limited subset of cases - namely, the user process calling close(2),
* we suspend the close request and handle it asynchronously. In all
* other cases, including close-on-exit, close-on-exec, and even dup2,
* the close is issued as a thread-synchronous request instead.
*/
if (may_suspend) {
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = SDEV_CLOSE;
m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_simple.sock_id = sock_id;
m.m_vfs_lsockdriver_simple.param = 0;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_bindconn failed: %d", r);
/* Suspend the process until the reply arrives. */
return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID,
GRANT_INVALID, -1, 0);
} else
/* Block the calling thread until the socket is closed. */
return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK);
}
/*
* Initiate a select call on a socket device. Return OK iff the request was
* sent, without suspending the process.
*/
int
sdev_select(dev_t dev, int ops)
{
struct smap *sp;
sockid_t sock_id;
message m;
int r;
if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
return EIO;
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = SDEV_SELECT;
m.m_vfs_lsockdriver_select.sock_id = sock_id;
m.m_vfs_lsockdriver_select.ops = ops;
/* Send the request to the driver. */
if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
panic("VFS: asynsend in sdev_select failed: %d", r);
return OK;
}
/*
* A reply has arrived for a previous socket accept request, and the reply
* indicates that a socket has been accepted. A status is also returned;
* usually, this status is OK, but if not, the newly accepted socket must be
* closed immediately again. Process the low-level aspects of the reply, and
* call resume_accept() to let the upper socket layer handle the rest. This
* function is always called from a worker thread, and may thus block.
*/
static void
sdev_finish_accept(struct fproc * rfp, message * m_ptr)
{
struct smap *sp;
sockid_t sock_id;
dev_t dev;
unsigned int len;
int status;
assert(rfp->fp_sdev.callnr == VFS_ACCEPT);
assert(m_ptr->m_type == SDEV_ACCEPT_REPLY);
assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id >= 0);
/* Free resources. Accept requests use up to one grant. */
if (GRANT_VALID(rfp->fp_sdev.grant[0]))
cpf_revoke(rfp->fp_sdev.grant[0]);
assert(!GRANT_VALID(rfp->fp_sdev.grant[1]));
assert(!GRANT_VALID(rfp->fp_sdev.grant[2]));
sock_id = m_ptr->m_lsockdriver_vfs_accept_reply.sock_id;
status = m_ptr->m_lsockdriver_vfs_accept_reply.status;
len = m_ptr->m_lsockdriver_vfs_accept_reply.len;
/*
* We do not want the upper socket layer (socket.c) to deal with smap
* and socket ID details, so we construct the new socket device number
* here. We won't use the saved listen FD to determine the smap entry
* here, since that involves file pointers and other upper-layer-only
* stuff. So we have to look it up by the source endpoint. As a
* result, we detect some driver deaths here (but not all: see below).
*/
if ((sp = get_smap_by_endpt(m_ptr->m_source)) != NULL) {
/* Leave 'status' as is, regardless of whether it is OK. */
dev = make_smap_dev(sp, sock_id);
} else {
/*
* The driver must have died while the thread was blocked on
* activation. Extremely rare, but theoretically possible.
* Some driver deaths are indicated only by a driver-up
* announcement though; resume_accept() will detect this by
* checking that the listening socket has not been invalidated.
*/
status = EIO;
dev = NO_DEV;
}
/* Let the upper socket layer handle the rest. */
resume_accept(rfp, status, dev, len, rfp->fp_sdev.aux.fd);
}
/*
* Worker thread stub for finishing successful accept requests.
*/
static void
do_accept_reply(void)
{
sdev_finish_accept(fp, &job_m_in);
}
/*
* With the exception of successful accept requests, this function is called
* whenever a reply is received for a socket driver request for which the
* corresponding user process was suspended (as opposed to requests which just
* suspend the worker thread), i.e., for long-lasting socket calls. This
* function is also called if the socket driver has died during a long-lasting
* socket call, in which case the given message's m_type is a negative error
* code.
*
* The division between the upper socket layer (socket.c) and the lower socket
* layer (this file) here is roughly: if resuming the system call involves no
* more than a simple replycode() call, do that here; otherwise call into the
* upper socket layer to handle the details. In any case, do not ever let the
* upper socket layer deal with reply message parsing or suspension state.
*
* This function may or may not be called from a worker thread; as such, it
* MUST NOT block its calling thread. This function is called for failed
* accept requests; successful accept requests have their replies routed
* through sdev_finish_accept() instead, because those require a worker thread.
*/
static void
sdev_finish(struct fproc * rfp, message * m_ptr)
{
unsigned int ctl_len, addr_len;
int callnr, status, flags;
/* The suspension status must just have been cleared by the caller. */
assert(rfp->fp_blocked_on == FP_BLOCKED_ON_NONE);
/*
* Free resources. Every suspending call sets all grant fields, so we
* can safely revoke all of them without testing the original call.
*/
if (GRANT_VALID(rfp->fp_sdev.grant[0]))
cpf_revoke(rfp->fp_sdev.grant[0]);
if (GRANT_VALID(rfp->fp_sdev.grant[1]))
cpf_revoke(rfp->fp_sdev.grant[1]);
if (GRANT_VALID(rfp->fp_sdev.grant[2]))
cpf_revoke(rfp->fp_sdev.grant[2]);
/*
* Now that the socket driver call has finished (or been stopped due to
* driver death), we need to finish the corresponding system call from
* the user process. The action to take depends on the system call.
*/
callnr = rfp->fp_sdev.callnr;
switch (callnr) {
case VFS_BIND:
case VFS_CONNECT:
case VFS_WRITE:
case VFS_SENDTO:
case VFS_SENDMSG:
case VFS_IOCTL:
case VFS_CLOSE:
/*
* These calls all use the same SDEV_REPLY reply type and only
* need to reply an OK-or-error status code back to userland.
*/
if (m_ptr->m_type == SDEV_REPLY) {
status = m_ptr->m_lsockdriver_vfs_reply.status;
/*
* For close(2) calls, the return value must indicate
* that the file descriptor has been closed.
*/
if (callnr == VFS_CLOSE &&
status != OK && status != EINPROGRESS)
status = OK;
} else if (m_ptr->m_type < 0) {
status = m_ptr->m_type;
} else {
printf("VFS: %d sent bad reply type %d for call %d\n",
m_ptr->m_source, m_ptr->m_type, callnr);
status = EIO;
}
replycode(rfp->fp_endpoint, status);
break;
case VFS_READ:
case VFS_RECVFROM:
case VFS_RECVMSG:
/*
* These calls use SDEV_RECV_REPLY. The action to take depends
* on the exact call.
*/
ctl_len = addr_len = 0;
flags = 0;
if (m_ptr->m_type == SDEV_RECV_REPLY) {
status = m_ptr->m_lsockdriver_vfs_recv_reply.status;
ctl_len = m_ptr->m_lsockdriver_vfs_recv_reply.ctl_len;
addr_len =
m_ptr->m_lsockdriver_vfs_recv_reply.addr_len;
flags = m_ptr->m_lsockdriver_vfs_recv_reply.flags;
} else if (m_ptr->m_type < 0) {
status = m_ptr->m_type;
} else {
printf("VFS: %d sent bad reply type %d for call %d\n",
m_ptr->m_source, m_ptr->m_type, callnr);
status = EIO;
}
switch (callnr) {
case VFS_READ:
replycode(rfp->fp_endpoint, status);
break;
case VFS_RECVFROM:
resume_recvfrom(rfp, status, addr_len);
break;
case VFS_RECVMSG:
resume_recvmsg(rfp, status, ctl_len, addr_len, flags,
rfp->fp_sdev.aux.buf);
break;
}
break;
case VFS_ACCEPT:
/*
* This call uses SDEV_ACCEPT_REPLY. We only get here if the
* accept call has failed without creating a new socket, in
* which case we can simply call replycode() with the error.
* For nothing other than consistency, we let resume_accept()
* handle this case too.
*/
addr_len = 0;
if (m_ptr->m_type == SDEV_ACCEPT_REPLY) {
assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id <
0);
status = m_ptr->m_lsockdriver_vfs_accept_reply.status;
addr_len = m_ptr->m_lsockdriver_vfs_accept_reply.len;
} else if (m_ptr->m_type < 0) {
status = m_ptr->m_type;
} else {
printf("VFS: %d sent bad reply type %d for call %d\n",
m_ptr->m_source, m_ptr->m_type, callnr);
status = EIO;
}
/*
* Quick rundown of m_lsockdriver_vfs_accept_reply cases:
*
* - sock_id >= 0, status == OK: new socket accepted
* - sock_id >= 0, status != OK: new socket must be closed
* - sock_id < 0, status != OK: failure accepting socket
* - sock_id < 0, status == OK: invalid, covered right here
*
* See libsockdriver for why there are two reply fields at all.
*/
if (status >= 0) {
printf("VFS: %d sent bad status %d for call %d\n",
m_ptr->m_source, status, callnr);
status = EIO;
}
resume_accept(rfp, status, NO_DEV, addr_len,
rfp->fp_sdev.aux.fd);
break;
default:
/*
* Ultimately, enumerating all system calls that may cause
* socket I/O may prove too cumbersome. In that case, the
* callnr field could be replaced by a field that stores the
* combination of the expected reply type and the action to
* take, for example.
*/
panic("VFS: socket reply %d for unknown call %d from %d",
m_ptr->m_type, callnr, rfp->fp_endpoint);
}
}
/*
* Abort the suspended socket call for the given process, because the
* corresponding socket driver has died.
*/
void
sdev_stop(struct fproc * rfp)
{
message m;
assert(rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV);
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
/*
* We use one single approach both here and when stopping worker
* threads: the reply message's m_type is set to an error code (always
* EIO for now) instead of an actual SDEV_ reply code. We test for
* this case in non-suspending calls as well as in sdev_finish().
*/
m.m_type = EIO;
sdev_finish(rfp, &m);
}
/*
* Cancel the ongoing long-lasting socket call, because the calling process has
* received a caught or terminating signal. This function is always called
* from a worker thread (as part of PM) work, with 'fp' set to the process that
* issued the original system call. The calling function has just unsuspended
* the process out of _SDEV blocking state. The job of this function is to
* issue a cancel request and then block until a reply comes in; the reply may
* indicate success, in which case it must be handled accordingly.
*/
void
sdev_cancel(void)
{
struct smap *sp;
message m;
sockid_t sock_id;
/* The suspension status must just have been cleared by the caller. */
assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
if ((sp = get_smap_by_dev(fp->fp_sdev.dev, &sock_id)) != NULL) {
/* Prepare the request message. */
memset(&m, 0, sizeof(m));
m.m_type = SDEV_CANCEL;
m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
m.m_vfs_lsockdriver_simple.sock_id = sock_id;
/*
* Send the cancel request, and wait for a reply. The reply
* will be for the original request and must be processed
* accordingly. It is possible that the original request
* actually succeeded, because 1) the cancel request resulted
* in partial success or 2) the original reply and the cancel
* request crossed each other. It is because of the second
* case that a socket driver must not respond at all to a
* cancel operation for an unknown request.
*/
sdev_sendrec(sp, &m);
} else
m.m_type = EIO;
/*
* Successful accept requests require special processing, but since we
* are already operating from a working thread here, we need not spawn
* an additional worker thread for this case.
*/
if (m.m_type == SDEV_ACCEPT_REPLY &&
m.m_lsockdriver_vfs_accept_reply.sock_id >= 0)
sdev_finish_accept(fp, &m);
else
sdev_finish(fp, &m);
}
/*
* A socket driver has sent a reply to a socket request. Process it, by either
* waking up an active worker thread, finishing the system call from here, or
* (in the exceptional case of accept calls) spawning a new worker thread to
* process the reply. This function MUST NOT block its calling thread.
*/
void
sdev_reply(void)
{
struct fproc *rfp;
struct smap *sp;
struct worker_thread *wp;
sockid_t req_id = -1;
dev_t dev;
int slot;
if ((sp = get_smap_by_endpt(who_e)) == NULL) {
printf("VFS: ignoring sock dev reply from unknown driver %d\n",
who_e);
return;
}
switch (call_nr) {
case SDEV_REPLY:
req_id = m_in.m_lsockdriver_vfs_reply.req_id;
break;
case SDEV_SOCKET_REPLY:
req_id = m_in.m_lsockdriver_vfs_socket_reply.req_id;
break;
case SDEV_ACCEPT_REPLY:
req_id = m_in.m_lsockdriver_vfs_accept_reply.req_id;
break;
case SDEV_RECV_REPLY:
req_id = m_in.m_lsockdriver_vfs_recv_reply.req_id;
break;
case SDEV_SELECT1_REPLY:
dev = make_smap_dev(sp,
m_in.m_lsockdriver_vfs_select_reply.sock_id);
select_sdev_reply1(dev,
m_in.m_lsockdriver_vfs_select_reply.status);
return;
case SDEV_SELECT2_REPLY:
dev = make_smap_dev(sp,
m_in.m_lsockdriver_vfs_select_reply.sock_id);
select_sdev_reply2(dev,
m_in.m_lsockdriver_vfs_select_reply.status);
return;
default:
printf("VFS: ignoring unknown sock dev reply %d from %d\n",
call_nr, who_e);
return;
}
if (isokendpt((endpoint_t)req_id, &slot) != OK) {
printf("VFS: ignoring sock dev reply from %d for unknown %d\n",
who_e, req_id);
return;
}
rfp = &fproc[slot];
wp = rfp->fp_worker;
if (wp != NULL && wp->w_task == who_e && wp->w_drv_sendrec != NULL) {
assert(!fp_is_blocked(rfp));
*wp->w_drv_sendrec = m_in;
wp->w_drv_sendrec = NULL;
worker_signal(wp); /* resume suspended thread */
/*
* It is up to the worker thread to 1) check that the reply is
* of the right type for the request, and 2) keep in mind that
* the reply type may be EIO in case the socket driver died.
*/
} else if (rfp->fp_blocked_on != FP_BLOCKED_ON_SDEV ||
get_smap_by_dev(rfp->fp_sdev.dev, NULL) != sp) {
printf("VFS: ignoring sock dev reply, %d not blocked on %d\n",
rfp->fp_endpoint, who_e);
return;
} else if (call_nr == SDEV_ACCEPT_REPLY &&
m_in.m_lsockdriver_vfs_accept_reply.sock_id >= 0) {
/*
* For accept replies that return a new socket, we need to
* spawn a worker thread, because accept calls may block (so
* there will no longer be a worker thread) and processing the
* reply requires additional blocking calls (which we cannot
* issue from the main thread). This is tricky. Under no
* circumstances may we "lose" a legitimate reply, because this
* would lead to resource leaks in the socket driver. To this
* end, we rely on the current worker thread model to
* prioritize regular work over PM work. Still, sdev_cancel()
* may end up receiving the accept reply if it was already
* blocked waiting for the reply message, and it must then
* perform the same tasks.
*/
/*
* It is possible that if all threads are in use, there is a
* "gap" between starting the thread and its activation. The
* main problem for this case is that the socket driver dies
* within that gap. For accepts, we address this with no less
* than two checks: 1) in this file, by looking up the smap
* entry by the reply source endpoint again - if the entry is
* no longer valid, the socket driver must have died; 2) in
* socket.c, by revalidating the original listening socket - if
* the listening socket has been invalidated, the driver died.
*
* Since we unsuspend the process now, a socket driver sending
* two accept replies in a row may never cause VFS to attempt
* spawning two threads; the second reply should be ignored.
*/
assert(fp->fp_func == NULL);
worker_start(rfp, do_accept_reply, &m_in, FALSE /*use_spare*/);
/*
* TODO: I just introduced the notion of not using the fp_u
* union across yields after unsuspension, but for socket calls
* we have a lot of socket state to carry over, so I'm now
* immediately violating my own rule again here. Possible
* solutions: 1) introduce another blocking state just to mark
* the fp_u union in use (this has side effects though), 2)
* introduce a pseudo message type which covers both the accept
* reply fields and the fp_u state (do_pending_pipe does this),
* or 3) add a fp_flags flag for this purpose. In any case,
* the whole point is that we catch any attempts to reuse fp_u
* for other purposes and thus cause state corruption. This
* should not happen anyway, but it's too dangerous to leave
* entirely unchecked. --dcvmoole
*/
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
} else {
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
sdev_finish(rfp, &m_in);
}
}
|