minix/servers/vfs/filedes.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656

/* This file contains the procedures that manipulate file descriptors.
 *
 * The entry points into this file are
 *   get_fd:	    look for free file descriptor and free filp slots
 *   get_filp:	    look up the filp entry for a given file descriptor
 *   find_filp:	    find a filp slot that points to a given vnode
 *   inval_filp:    invalidate a filp and associated fd's, only let close()
 *                  happen on it
 *   do_copyfd:     copies a file descriptor from or to another endpoint
 */

#include <sys/select.h>
#include <minix/callnr.h>
#include <minix/u64.h>
#include <assert.h>
#include <sys/stat.h>
#include "fs.h"
#include "file.h"
#include "vnode.h"


#if LOCK_DEBUG
/*===========================================================================*
 *				check_filp_locks			     *
 *===========================================================================*/
void check_filp_locks_by_me(void)
{
/* Check whether this thread still has filp locks held */
  struct filp *f;
  int r;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	r = mutex_trylock(&f->filp_lock);
	if (r == -EDEADLK)
		panic("Thread %d still holds filp lock on filp %p call_nr=%d\n",
		      mthread_self(), f, job_call_nr);
	else if (r == 0) {
		/* We just obtained the lock, release it */
		mutex_unlock(&f->filp_lock);
	}
  }
}
#endif

/*===========================================================================*
 *				check_filp_locks			     *
 *===========================================================================*/
void check_filp_locks(void)
{
  struct filp *f;
  int r, count = 0;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	r = mutex_trylock(&f->filp_lock);
	if (r == -EBUSY) {
		/* Mutex is still locked */
		count++;
	} else if (r == 0) {
		/* We just obtained a lock, don't want it */
		mutex_unlock(&f->filp_lock);
	} else
		panic("filp_lock weird state");
  }
  if (count) panic("locked filps");
#if 0
  else printf("check_filp_locks OK\n");
#endif
}

/*===========================================================================*
 *				init_filps				     *
 *===========================================================================*/
void init_filps(void)
{
/* Initialize filps */
  struct filp *f;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	if (mutex_init(&f->filp_lock, NULL) != 0)
		panic("Failed to initialize filp mutex");
  }

}

/*===========================================================================*
 *				check_fds				     *
 *===========================================================================*/
int check_fds(struct fproc *rfp, int nfds)
{
/* Check whether at least 'nfds' file descriptors can be created in the process
 * 'rfp'.  Return OK on success, or otherwise an appropriate error code.
 */
  int i;

  assert(nfds >= 1);

  for (i = 0; i < OPEN_MAX; i++) {
	if (rfp->fp_filp[i] == NULL) {
		if (--nfds == 0)
			return OK;
	}
  }

  return EMFILE;
}

/*===========================================================================*
 *				get_fd					     *
 *===========================================================================*/
int get_fd(struct fproc *rfp, int start, mode_t bits, int *k, struct filp **fpt)
{
/* Look for a free file descriptor and a free filp slot.  Fill in the mode word
 * in the latter, but don't claim either one yet, since the open() or creat()
 * may yet fail.
 */

  register struct filp *f;
  register int i;

  /* Search the fproc fp_filp table for a free file descriptor. */
  for (i = start; i < OPEN_MAX; i++) {
	if (rfp->fp_filp[i] == NULL) {
		/* A file descriptor has been located. */
		*k = i;
		break;
	}
  }

  /* Check to see if a file descriptor has been found. */
  if (i >= OPEN_MAX) return(EMFILE);

  /* If we don't care about a filp, return now */
  if (fpt == NULL) return(OK);

  /* Now that a file descriptor has been found, look for a free filp slot. */
  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	assert(f->filp_count >= 0);
	if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) {
		f->filp_mode = bits;
		f->filp_pos = 0;
		f->filp_selectors = 0;
		f->filp_select_ops = 0;
		f->filp_pipe_select_ops = 0;
		f->filp_select_dev = NO_DEV;
		f->filp_flags = 0;
		f->filp_select_flags = 0;
		f->filp_softlock = NULL;
		f->filp_ioctl_fp = NULL;
		*fpt = f;
		return(OK);
	}
  }

  /* If control passes here, the filp table must be full.  Report that back. */
  return(ENFILE);
}


/*===========================================================================*
 *				get_filp				     *
 *===========================================================================*/
struct filp *
get_filp(
	int fild,			/* file descriptor */
	tll_access_t locktype
)
{
/* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */

  return get_filp2(fp, fild, locktype);
}


/*===========================================================================*
 *				get_filp2				     *
 *===========================================================================*/
struct filp *
get_filp2(
	register struct fproc *rfp,
	int fild,			/* file descriptor */
	tll_access_t locktype
)
{
/* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
  struct filp *filp;

  filp = NULL;
  if (fild < 0 || fild >= OPEN_MAX)
	err_code = EBADF;
  else if (locktype != VNODE_OPCL && rfp->fp_filp[fild] != NULL &&
		rfp->fp_filp[fild]->filp_mode == FILP_CLOSED)
	err_code = EIO; /* disallow all use except close(2) */
  else if ((filp = rfp->fp_filp[fild]) == NULL)
	err_code = EBADF;
  else if (locktype != VNODE_NONE)	/* Only lock the filp if requested */
	lock_filp(filp, locktype);	/* All is fine */

  return(filp);	/* may also be NULL */
}


/*===========================================================================*
 *				find_filp				     *
 *===========================================================================*/
struct filp *find_filp(struct vnode *vp, mode_t bits)
{
/* Find a filp slot that refers to the vnode 'vp' in a way as described
 * by the mode bit 'bits'. Used for determining whether somebody is still
 * interested in either end of a pipe.  Also used when opening a FIFO to
 * find partners to share a filp field with (to shared the file position).
 * Like 'get_fd' it performs its job by linear search through the filp table.
 */

  struct filp *f;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) {
		return(f);
	}
  }

  /* If control passes here, the filp wasn't there.  Report that back. */
  return(NULL);
}

/*===========================================================================*
 *				find_filp_by_sock_dev			     *
 *===========================================================================*/
struct filp *find_filp_by_sock_dev(dev_t dev)
{
/* See if there is a file pointer for a socket with the given socket device
 * number.
 */
  struct filp *f;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	if (f->filp_count != 0 && f->filp_vno != NULL &&
	    S_ISSOCK(f->filp_vno->v_mode) && f->filp_vno->v_sdev == dev &&
	    f->filp_mode != FILP_CLOSED) {
		return f;
	}
  }

  return NULL;
}

/*===========================================================================*
 *				invalidate_filp				     *
 *===========================================================================*/
void invalidate_filp(struct filp *rfilp)
{
/* Invalidate filp. */

  rfilp->filp_mode = FILP_CLOSED;
}

/*===========================================================================*
 *			invalidate_filp_by_char_major			     *
 *===========================================================================*/
void invalidate_filp_by_char_major(devmajor_t major)
{
  struct filp *f;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	if (f->filp_count != 0 && f->filp_vno != NULL) {
		if (major(f->filp_vno->v_sdev) == major &&
		    S_ISCHR(f->filp_vno->v_mode)) {
			invalidate_filp(f);
		}
	}
  }
}

/*===========================================================================*
 *			invalidate_filp_by_sock_drv			     *
 *===========================================================================*/
void invalidate_filp_by_sock_drv(unsigned int num)
{
/* Invalidate all file pointers for sockets owned by the socket driver with the
 * smap number 'num'.
 */
  struct filp *f;
  struct smap *sp;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	if (f->filp_count != 0 && f->filp_vno != NULL) {
		if (S_ISSOCK(f->filp_vno->v_mode) &&
		    (sp = get_smap_by_dev(f->filp_vno->v_sdev, NULL)) != NULL
		    && sp->smap_num == num)
			invalidate_filp(f);
	}
  }
}

/*===========================================================================*
 *			invalidate_filp_by_endpt			     *
 *===========================================================================*/
void invalidate_filp_by_endpt(endpoint_t proc_e)
{
  struct filp *f;

  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
	if (f->filp_count != 0 && f->filp_vno != NULL) {
		if (f->filp_vno->v_fs_e == proc_e)
			invalidate_filp(f);
	}
  }
}

/*===========================================================================*
 *				lock_filp				     *
 *===========================================================================*/
void
lock_filp(struct filp *filp, tll_access_t locktype)
{
  struct worker_thread *org_self;
  struct vnode *vp;

  assert(filp->filp_count > 0);
  vp = filp->filp_vno;
  assert(vp != NULL);

  /* Lock vnode only if we haven't already locked it. If already locked by us,
   * we're allowed to have one additional 'soft' lock. */
  if (tll_locked_by_me(&vp->v_lock)) {
	assert(filp->filp_softlock == NULL);
	filp->filp_softlock = fp;
  } else {
	/* We have to make an exception for vnodes belonging to pipes. Even
	 * read(2) operations on pipes change the vnode and therefore require
	 * exclusive access.
	 */
	if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ)
		locktype = VNODE_WRITE;
	lock_vnode(vp, locktype);
  }

  assert(vp->v_ref_count > 0);	/* vnode still in use? */
  assert(filp->filp_vno == vp);	/* vnode still what we think it is? */

  /* First try to get filp lock right off the bat */
  if (mutex_trylock(&filp->filp_lock) != 0) {

	/* Already in use, let's wait for our turn */
	org_self = worker_suspend();

	if (mutex_lock(&filp->filp_lock) != 0)
		panic("unable to obtain lock on filp");

	worker_resume(org_self);
  }
}

/*===========================================================================*
 *				unlock_filp				     *
 *===========================================================================*/
void
unlock_filp(struct filp *filp)
{
  /* If this filp holds a soft lock on the vnode, we must be the owner */
  if (filp->filp_softlock != NULL)
	assert(filp->filp_softlock == fp);

  if (filp->filp_count > 0) {
	/* Only unlock vnode if filp is still in use */

	/* and if we don't hold a soft lock */
	if (filp->filp_softlock == NULL) {
		assert(tll_islocked(&(filp->filp_vno->v_lock)));
		unlock_vnode(filp->filp_vno);
	}
  }

  filp->filp_softlock = NULL;
  if (mutex_unlock(&filp->filp_lock) != 0)
	panic("unable to release lock on filp");
}

/*===========================================================================*
 *				unlock_filps				     *
 *===========================================================================*/
void
unlock_filps(struct filp *filp1, struct filp *filp2)
{
/* Unlock two filps that are tied to the same vnode. As a thread can lock a
 * vnode only once, unlocking the vnode twice would result in an error. */

  /* No NULL pointers and not equal */
  assert(filp1);
  assert(filp2);
  assert(filp1 != filp2);

  /* Must be tied to the same vnode and not NULL */
  assert(filp1->filp_vno == filp2->filp_vno);
  assert(filp1->filp_vno != NULL);

  if (filp1->filp_count > 0 && filp2->filp_count > 0) {
	/* Only unlock vnode if filps are still in use */
	unlock_vnode(filp1->filp_vno);
  }

  filp1->filp_softlock = NULL;
  filp2->filp_softlock = NULL;
  if (mutex_unlock(&filp2->filp_lock) != 0)
	panic("unable to release filp lock on filp2");
  if (mutex_unlock(&filp1->filp_lock) != 0)
	panic("unable to release filp lock on filp1");
}

/*===========================================================================*
 *				close_filp				     *
 *===========================================================================*/
int
close_filp(struct filp * f, int may_suspend)
{
/* Close a file.  Will also unlock filp when done.  The 'may_suspend' flag
 * indicates whether the current process may be suspended closing a socket.
 * That is currently supported only when the user issued a close(2), and (only)
 * in that case may this function return SUSPEND instead of OK.  In all other
 * cases, this function will always return OK.  It will never return another
 * error code, for reasons explained below.
 */
  int r, rw;
  dev_t dev;
  struct vnode *vp;

  /* Must be locked */
  assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
  assert(tll_islocked(&f->filp_vno->v_lock));

  vp = f->filp_vno;

  r = OK;

  if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
	/* Check to see if the file is special. */
	if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode) ||
	    S_ISSOCK(vp->v_mode)) {
		dev = vp->v_sdev;
		if (S_ISBLK(vp->v_mode))  {
			lock_bsf();
			if (vp->v_bfs_e == ROOT_FS_E && dev != ROOT_DEV) {
				/* Invalidate the cache unless the special is
				 * mounted. Be careful not to flush the root
				 * file system either.
				 */
				(void) req_flush(vp->v_bfs_e, dev);
			}
			unlock_bsf();

			(void) bdev_close(dev);	/* Ignore errors */
		} else if (S_ISCHR(vp->v_mode)) {
			(void) cdev_close(dev);	/* Ignore errors */
		} else {
			/*
			 * Sockets may take a while to be closed (SO_LINGER),
			 * and thus, we may issue a suspending close to a
			 * socket driver.  Getting this working for close(2) is
			 * the easy case, and that is all we support for now.
			 * However, there is also eg dup2(2), which if
			 * interrupted by a signal should technically fail
			 * without closing the file descriptor.  Then there are
			 * cases where the close should never block: process
			 * exit and close-on-exec for example.  Getting all
			 * such cases right is left to future work; currently
			 * they all perform thread-blocking socket closes and
			 * thus cause the socket to perform lingering in the
			 * background if at all.
			 */
			assert(!may_suspend || job_call_nr == VFS_CLOSE);

			if (f->filp_flags & O_NONBLOCK)
				may_suspend = FALSE;

			r = sdev_close(dev, may_suspend);

			/*
			 * Returning a non-OK error is a bad idea, because it
			 * will leave the application wondering whether closing
			 * the file descriptor actually succeeded.
			 */
			if (r != SUSPEND)
				r = OK;
		}

		f->filp_mode = FILP_CLOSED;
	}
  }

  /* If the inode being closed is a pipe, release everyone hanging on it. */
  if (S_ISFIFO(vp->v_mode)) {
	rw = (f->filp_mode & R_BIT ? VFS_WRITE : VFS_READ);
	release(vp, rw, susp_count);
  }

  if (--f->filp_count == 0) {
	if (S_ISFIFO(vp->v_mode)) {
		/* Last reader or writer is going. Tell PFS about latest
		 * pipe size.
		 */
		truncate_vnode(vp, vp->v_size);
	}

	unlock_vnode(f->filp_vno);
	put_vnode(f->filp_vno);
	f->filp_vno = NULL;
	f->filp_mode = FILP_CLOSED;
	f->filp_count = 0;
  } else if (f->filp_count < 0) {
	panic("VFS: invalid filp count: %d ino %llx/%llu", f->filp_count,
	      vp->v_dev, vp->v_inode_nr);
  } else {
	unlock_vnode(f->filp_vno);
  }

  mutex_unlock(&f->filp_lock);

  return r;
}

/*===========================================================================*
 *				do_copyfd				     *
 *===========================================================================*/
int do_copyfd(void)
{
/* Copy a file descriptor between processes, or close a remote file descriptor.
 * This call is used as back-call by device drivers (UDS, VND), and is expected
 * to be used in response to either an IOCTL to VND or a SEND or RECV socket
 * request to UDS.
 */
  struct fproc *rfp;
  struct filp *rfilp;
  struct vnode *vp;
  struct smap *sp;
  endpoint_t endpt;
  int r, fd, what, flags, slot;

  /* This should be replaced with an ACL check. */
  if (!super_user) return(EPERM);

  endpt = job_m_in.m_lsys_vfs_copyfd.endpt;
  fd = job_m_in.m_lsys_vfs_copyfd.fd;
  what = job_m_in.m_lsys_vfs_copyfd.what;

  flags = what & COPYFD_FLAGS;
  what &= ~COPYFD_FLAGS;

  if (isokendpt(endpt, &slot) != OK) return(EINVAL);
  rfp = &fproc[slot];

  /* FIXME: we should now check that the user process is indeed blocked on an
   * IOCTL or socket call, so that we can safely mess with its file
   * descriptors.  We currently do not have the necessary state to verify this,
   * so we assume that the call is always used in the right way.
   */

  /* Depending on the operation, get the file descriptor from the caller or the
   * user process.  Do not lock the filp yet: we first need to make sure that
   * locking it will not result in a deadlock.
   */
  rfilp = get_filp2((what == COPYFD_TO) ? fp : rfp, fd, VNODE_NONE);
  if (rfilp == NULL)
	return(err_code);

  /* If the filp is involved in an IOCTL by the user process, locking the filp
   * here would result in a deadlock.  This would happen if a user process
   * passes in the file descriptor to the device node on which it is performing
   * the IOCTL.  We do not allow manipulation of such device nodes.  In
   * practice, this only applies to block-special files (and thus VND), because
   * socket files (as used by UDS) are unlocked during the socket operation.
   */
  if (rfilp->filp_ioctl_fp == rfp)
	return(EBADF);

  /* Now we can safely lock the filp, copy or close it, and unlock it again. */
  lock_filp(rfilp, VNODE_READ);

  switch (what) {
  case COPYFD_FROM:
	/*
	 * If the caller is a socket driver (namely, UDS) and the file
	 * descriptor being copied in is a socket for that socket driver, then
	 * deny the call, because of at least two known issues.  Both issues
	 * are related to UDS having an in-flight file descriptor that is the
	 * last reference to a UDS socket:
	 *
	 * 1) if UDS tries to close the file descriptor, this will prompt VFS
	 *    to close the underlying object, which is a UDS socket.  As a
	 *    result, while UDS is blocked in the close(2), VFS will try to
	 *    send a request to UDS to close the socket.  This results in a
	 *    deadlock of the UDS service.
	 *
	 * 2) if a file descriptor for a UDS socket is sent across that same
	 *    UDS socket, the socket will remain referenced by UDS, thus open
	 *    in VFS, and therefore also open in UDS.  The socket and file
	 *    descriptor will both remain in use for the rest of UDS' lifetime.
	 *    This can easily result in denial-of-service in the UDS service.
	 *    The same problem can be triggered using multiple sockets that
	 *    have in-flight file descriptors referencing each other.
	 *
	 * A proper solution for these problems may consist of some form of
	 * "soft reference counting" where VFS does not count UDS having a
	 * filp open as a real reference.  That is tricky business, so for now
	 * we prevent any such problems with the check here.
	 */
	if ((vp = rfilp->filp_vno) != NULL && S_ISSOCK(vp->v_mode) &&
	    (sp = get_smap_by_dev(vp->v_sdev, NULL)) != NULL &&
	    sp->smap_endpt == who_e) {
		r = EDEADLK;

		break;
	}

	rfp = fp;
	flags &= ~COPYFD_CLOEXEC;

	/* FALLTHROUGH */
  case COPYFD_TO:
	/* Find a free file descriptor slot in the local or remote process. */
	for (fd = 0; fd < OPEN_MAX; fd++)
		if (rfp->fp_filp[fd] == NULL)
			break;

	/* If found, fill the slot and return the slot number. */
	if (fd < OPEN_MAX) {
		rfp->fp_filp[fd] = rfilp;
		if (flags & COPYFD_CLOEXEC)
			FD_SET(fd, &rfp->fp_cloexec_set);
		rfilp->filp_count++;
		r = fd;
	} else
		r = EMFILE;

	break;

  case COPYFD_CLOSE:
	/* This should be used ONLY to revert a successful copy-to operation,
	 * and assumes that the filp is still in use by the caller as well.
	 */
	if (rfilp->filp_count > 1) {
		rfilp->filp_count--;
		rfp->fp_filp[fd] = NULL;
		r = OK;
	} else
		r = EBADF;

	break;

  default:
	r = EINVAL;
  }

  unlock_filp(rfilp);

  return(r);
}