/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- readsb
- bzero
- balloc
- bfree
- iinit
- ialloc
- iupdate
- iget
- idup
- ilock
- iunlock
- iput
- iunlockput
- bmap
- itrunc
- stati
- readi
- writei
- namecmp
- dirlookup
- dirlink
- skipelem
- namex
- namei
- nameiparent
1 // File system implementation. Five layers:
2 // + Blocks: allocator for raw disk blocks.
3 // + Log: crash recovery for multi-step updates.
4 // + Files: inode allocator, reading, writing, metadata.
5 // + Directories: inode with special contents (list of other inodes!)
6 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
7 //
8 // This file contains the low-level file system manipulation
9 // routines. The (higher-level) system call implementations
10 // are in sysfile.c.
11
12 #include "types.h"
13 #include "defs.h"
14 #include "param.h"
15 #include "stat.h"
16 #include "mmu.h"
17 #include "proc.h"
18 #include "spinlock.h"
19 #include "fs.h"
20 #include "buf.h"
21 #include "file.h"
22
23 #define min(a, b) ((a) < (b) ? (a) : (b))
24 static void itrunc(struct inode*);
25 struct superblock sb; // there should be one per dev, but we run with one dev
26
27 // Read the super block.
28 void
29 readsb(int dev, struct superblock *sb)
30 {
31 struct buf *bp;
32
33 bp = bread(dev, 1);
34 memmove(sb, bp->data, sizeof(*sb));
35 brelse(bp);
36 }
37
38 // Zero a block.
39 static void
40 bzero(int dev, int bno)
41 {
42 struct buf *bp;
43
44 bp = bread(dev, bno);
45 memset(bp->data, 0, BSIZE);
46 log_write(bp);
47 brelse(bp);
48 }
49
50 // Blocks.
51
52 // Allocate a zeroed disk block.
53 static uint
54 balloc(uint dev)
55 {
56 int b, bi, m;
57 struct buf *bp;
58
59 bp = 0;
60 for(b = 0; b < sb.size; b += BPB){
61 bp = bread(dev, BBLOCK(b, sb));
62 for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
63 m = 1 << (bi % 8);
64 if((bp->data[bi/8] & m) == 0){ // Is block free?
65 bp->data[bi/8] |= m; // Mark block in use.
66 log_write(bp);
67 brelse(bp);
68 bzero(dev, b + bi);
69 return b + bi;
70 }
71 }
72 brelse(bp);
73 }
74 panic("balloc: out of blocks");
75 }
76
77 // Free a disk block.
78 static void
79 bfree(int dev, uint b)
80 {
81 struct buf *bp;
82 int bi, m;
83
84 readsb(dev, &sb);
85 bp = bread(dev, BBLOCK(b, sb));
86 bi = b % BPB;
87 m = 1 << (bi % 8);
88 if((bp->data[bi/8] & m) == 0)
89 panic("freeing free block");
90 bp->data[bi/8] &= ~m;
91 log_write(bp);
92 brelse(bp);
93 }
94
95 // Inodes.
96 //
97 // An inode describes a single unnamed file.
98 // The inode disk structure holds metadata: the file's type,
99 // its size, the number of links referring to it, and the
100 // list of blocks holding the file's content.
101 //
102 // The inodes are laid out sequentially on disk at
103 // sb.startinode. Each inode has a number, indicating its
104 // position on the disk.
105 //
106 // The kernel keeps a cache of in-use inodes in memory
107 // to provide a place for synchronizing access
108 // to inodes used by multiple processes. The cached
109 // inodes include book-keeping information that is
110 // not stored on disk: ip->ref and ip->flags.
111 //
112 // An inode and its in-memory represtative go through a
113 // sequence of states before they can be used by the
114 // rest of the file system code.
115 //
116 // * Allocation: an inode is allocated if its type (on disk)
117 // is non-zero. ialloc() allocates, iput() frees if
118 // the link count has fallen to zero.
119 //
120 // * Referencing in cache: an entry in the inode cache
121 // is free if ip->ref is zero. Otherwise ip->ref tracks
122 // the number of in-memory pointers to the entry (open
123 // files and current directories). iget() to find or
124 // create a cache entry and increment its ref, iput()
125 // to decrement ref.
126 //
127 // * Valid: the information (type, size, &c) in an inode
128 // cache entry is only correct when the I_VALID bit
129 // is set in ip->flags. ilock() reads the inode from
130 // the disk and sets I_VALID, while iput() clears
131 // I_VALID if ip->ref has fallen to zero.
132 //
133 // * Locked: file system code may only examine and modify
134 // the information in an inode and its content if it
135 // has first locked the inode. The I_BUSY flag indicates
136 // that the inode is locked. ilock() sets I_BUSY,
137 // while iunlock clears it.
138 //
139 // Thus a typical sequence is:
140 // ip = iget(dev, inum)
141 // ilock(ip)
142 // ... examine and modify ip->xxx ...
143 // iunlock(ip)
144 // iput(ip)
145 //
146 // ilock() is separate from iget() so that system calls can
147 // get a long-term reference to an inode (as for an open file)
148 // and only lock it for short periods (e.g., in read()).
149 // The separation also helps avoid deadlock and races during
150 // pathname lookup. iget() increments ip->ref so that the inode
151 // stays cached and pointers to it remain valid.
152 //
153 // Many internal file system functions expect the caller to
154 // have locked the inodes involved; this lets callers create
155 // multi-step atomic operations.
156
157 struct {
158 struct spinlock lock;
159 struct inode inode[NINODE];
160 } icache;
161
162 void
163 iinit(int dev)
164 {
165 initlock(&icache.lock, "icache");
166 readsb(dev, &sb);
167 cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d inodestart %d bmap start %d\n", sb.size,
168 sb.nblocks, sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, sb.bmapstart);
169 }
170
171 static struct inode* iget(uint dev, uint inum);
172
173 //PAGEBREAK!
174 // Allocate a new inode with the given type on device dev.
175 // A free inode has a type of zero.
176 struct inode*
177 ialloc(uint dev, short type)
178 {
179 int inum;
180 struct buf *bp;
181 struct dinode *dip;
182
183 for(inum = 1; inum < sb.ninodes; inum++){
184 bp = bread(dev, IBLOCK(inum, sb));
185 dip = (struct dinode*)bp->data + inum%IPB;
186 if(dip->type == 0){ // a free inode
187 memset(dip, 0, sizeof(*dip));
188 dip->type = type;
189 log_write(bp); // mark it allocated on the disk
190 brelse(bp);
191 return iget(dev, inum);
192 }
193 brelse(bp);
194 }
195 panic("ialloc: no inodes");
196 }
197
198 // Copy a modified in-memory inode to disk.
199 void
200 iupdate(struct inode *ip)
201 {
202 struct buf *bp;
203 struct dinode *dip;
204
205 bp = bread(ip->dev, IBLOCK(ip->inum, sb));
206 dip = (struct dinode*)bp->data + ip->inum%IPB;
207 dip->type = ip->type;
208 dip->major = ip->major;
209 dip->minor = ip->minor;
210 dip->nlink = ip->nlink;
211 dip->size = ip->size;
212 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
213 log_write(bp);
214 brelse(bp);
215 }
216
217 // Find the inode with number inum on device dev
218 // and return the in-memory copy. Does not lock
219 // the inode and does not read it from disk.
220 static struct inode*
221 iget(uint dev, uint inum)
222 {
223 struct inode *ip, *empty;
224
225 acquire(&icache.lock);
226
227 // Is the inode already cached?
228 empty = 0;
229 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
230 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
231 ip->ref++;
232 release(&icache.lock);
233 return ip;
234 }
235 if(empty == 0 && ip->ref == 0) // Remember empty slot.
236 empty = ip;
237 }
238
239 // Recycle an inode cache entry.
240 if(empty == 0)
241 panic("iget: no inodes");
242
243 ip = empty;
244 ip->dev = dev;
245 ip->inum = inum;
246 ip->ref = 1;
247 ip->flags = 0;
248 release(&icache.lock);
249
250 return ip;
251 }
252
253 // Increment reference count for ip.
254 // Returns ip to enable ip = idup(ip1) idiom.
255 struct inode*
256 idup(struct inode *ip)
257 {
258 acquire(&icache.lock);
259 ip->ref++;
260 release(&icache.lock);
261 return ip;
262 }
263
264 // Lock the given inode.
265 // Reads the inode from disk if necessary.
266 void
267 ilock(struct inode *ip)
268 {
269 struct buf *bp;
270 struct dinode *dip;
271
272 if(ip == 0 || ip->ref < 1)
273 panic("ilock");
274
275 acquire(&icache.lock);
276 while(ip->flags & I_BUSY)
277 sleep(ip, &icache.lock);
278 ip->flags |= I_BUSY;
279 release(&icache.lock);
280
281 if(!(ip->flags & I_VALID)){
282 bp = bread(ip->dev, IBLOCK(ip->inum, sb));
283 dip = (struct dinode*)bp->data + ip->inum%IPB;
284 ip->type = dip->type;
285 ip->major = dip->major;
286 ip->minor = dip->minor;
287 ip->nlink = dip->nlink;
288 ip->size = dip->size;
289 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
290 brelse(bp);
291 ip->flags |= I_VALID;
292 if(ip->type == 0)
293 panic("ilock: no type");
294 }
295 }
296
297 // Unlock the given inode.
298 void
299 iunlock(struct inode *ip)
300 {
301 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
302 panic("iunlock");
303
304 acquire(&icache.lock);
305 ip->flags &= ~I_BUSY;
306 wakeup(ip);
307 release(&icache.lock);
308 }
309
310 // Drop a reference to an in-memory inode.
311 // If that was the last reference, the inode cache entry can
312 // be recycled.
313 // If that was the last reference and the inode has no links
314 // to it, free the inode (and its content) on disk.
315 // All calls to iput() must be inside a transaction in
316 // case it has to free the inode.
317 void
318 iput(struct inode *ip)
319 {
320 acquire(&icache.lock);
321 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
322 // inode has no links and no other references: truncate and free.
323 if(ip->flags & I_BUSY)
324 panic("iput busy");
325 ip->flags |= I_BUSY;
326 release(&icache.lock);
327 itrunc(ip);
328 ip->type = 0;
329 iupdate(ip);
330 acquire(&icache.lock);
331 ip->flags = 0;
332 wakeup(ip);
333 }
334 ip->ref--;
335 release(&icache.lock);
336 }
337
338 // Common idiom: unlock, then put.
339 void
340 iunlockput(struct inode *ip)
341 {
342 iunlock(ip);
343 iput(ip);
344 }
345
346 //PAGEBREAK!
347 // Inode content
348 //
349 // The content (data) associated with each inode is stored
350 // in blocks on the disk. The first NDIRECT block numbers
351 // are listed in ip->addrs[]. The next NINDIRECT blocks are
352 // listed in block ip->addrs[NDIRECT].
353
354 // Return the disk block address of the nth block in inode ip.
355 // If there is no such block, bmap allocates one.
356 static uint
357 bmap(struct inode *ip, uint bn)
358 {
359 uint addr, *a;
360 struct buf *bp;
361
362 if(bn < NDIRECT){
363 if((addr = ip->addrs[bn]) == 0)
364 ip->addrs[bn] = addr = balloc(ip->dev);
365 return addr;
366 }
367 bn -= NDIRECT;
368
369 if(bn < NINDIRECT){
370 // Load indirect block, allocating if necessary.
371 if((addr = ip->addrs[NDIRECT]) == 0)
372 ip->addrs[NDIRECT] = addr = balloc(ip->dev);
373 bp = bread(ip->dev, addr);
374 a = (uint*)bp->data;
375 if((addr = a[bn]) == 0){
376 a[bn] = addr = balloc(ip->dev);
377 log_write(bp);
378 }
379 brelse(bp);
380 return addr;
381 }
382
383 panic("bmap: out of range");
384 }
385
386 // Truncate inode (discard contents).
387 // Only called when the inode has no links
388 // to it (no directory entries referring to it)
389 // and has no in-memory reference to it (is
390 // not an open file or current directory).
391 static void
392 itrunc(struct inode *ip)
393 {
394 int i, j;
395 struct buf *bp;
396 uint *a;
397
398 for(i = 0; i < NDIRECT; i++){
399 if(ip->addrs[i]){
400 bfree(ip->dev, ip->addrs[i]);
401 ip->addrs[i] = 0;
402 }
403 }
404
405 if(ip->addrs[NDIRECT]){
406 bp = bread(ip->dev, ip->addrs[NDIRECT]);
407 a = (uint*)bp->data;
408 for(j = 0; j < NINDIRECT; j++){
409 if(a[j])
410 bfree(ip->dev, a[j]);
411 }
412 brelse(bp);
413 bfree(ip->dev, ip->addrs[NDIRECT]);
414 ip->addrs[NDIRECT] = 0;
415 }
416
417 ip->size = 0;
418 iupdate(ip);
419 }
420
421 // Copy stat information from inode.
422 void
423 stati(struct inode *ip, struct stat *st)
424 {
425 st->dev = ip->dev;
426 st->ino = ip->inum;
427 st->type = ip->type;
428 st->nlink = ip->nlink;
429 st->size = ip->size;
430 }
431
432 //PAGEBREAK!
433 // Read data from inode.
434 int
435 readi(struct inode *ip, char *dst, uint off, uint n)
436 {
437 uint tot, m;
438 struct buf *bp;
439
440 if(ip->type == T_DEV){
441 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
442 return -1;
443 return devsw[ip->major].read(ip, dst, n);
444 }
445
446 if(off > ip->size || off + n < off)
447 return -1;
448 if(off + n > ip->size)
449 n = ip->size - off;
450
451 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
452 bp = bread(ip->dev, bmap(ip, off/BSIZE));
453 m = min(n - tot, BSIZE - off%BSIZE);
454 memmove(dst, bp->data + off%BSIZE, m);
455 brelse(bp);
456 }
457 return n;
458 }
459
460 // PAGEBREAK!
461 // Write data to inode.
462 int
463 writei(struct inode *ip, char *src, uint off, uint n)
464 {
465 uint tot, m;
466 struct buf *bp;
467
468 if(ip->type == T_DEV){
469 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
470 return -1;
471 return devsw[ip->major].write(ip, src, n);
472 }
473
474 if(off > ip->size || off + n < off)
475 return -1;
476 if(off + n > MAXFILE*BSIZE)
477 return -1;
478
479 for(tot=0; tot<n; tot+=m, off+=m, src+=m){
480 bp = bread(ip->dev, bmap(ip, off/BSIZE));
481 m = min(n - tot, BSIZE - off%BSIZE);
482 memmove(bp->data + off%BSIZE, src, m);
483 log_write(bp);
484 brelse(bp);
485 }
486
487 if(n > 0 && off > ip->size){
488 ip->size = off;
489 iupdate(ip);
490 }
491 return n;
492 }
493
494 //PAGEBREAK!
495 // Directories
496
497 int
498 namecmp(const char *s, const char *t)
499 {
500 return strncmp(s, t, DIRSIZ);
501 }
502
503 // Look for a directory entry in a directory.
504 // If found, set *poff to byte offset of entry.
505 struct inode*
506 dirlookup(struct inode *dp, char *name, uint *poff)
507 {
508 uint off, inum;
509 struct dirent de;
510
511 if(dp->type != T_DIR)
512 panic("dirlookup not DIR");
513
514 for(off = 0; off < dp->size; off += sizeof(de)){
515 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
516 panic("dirlink read");
517 if(de.inum == 0)
518 continue;
519 if(namecmp(name, de.name) == 0){
520 // entry matches path element
521 if(poff)
522 *poff = off;
523 inum = de.inum;
524 return iget(dp->dev, inum);
525 }
526 }
527
528 return 0;
529 }
530
531 // Write a new directory entry (name, inum) into the directory dp.
532 int
533 dirlink(struct inode *dp, char *name, uint inum)
534 {
535 int off;
536 struct dirent de;
537 struct inode *ip;
538
539 // Check that name is not present.
540 if((ip = dirlookup(dp, name, 0)) != 0){
541 iput(ip);
542 return -1;
543 }
544
545 // Look for an empty dirent.
546 for(off = 0; off < dp->size; off += sizeof(de)){
547 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
548 panic("dirlink read");
549 if(de.inum == 0)
550 break;
551 }
552
553 strncpy(de.name, name, DIRSIZ);
554 de.inum = inum;
555 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
556 panic("dirlink");
557
558 return 0;
559 }
560
561 //PAGEBREAK!
562 // Paths
563
564 // Copy the next path element from path into name.
565 // Return a pointer to the element following the copied one.
566 // The returned path has no leading slashes,
567 // so the caller can check *path=='\0' to see if the name is the last one.
568 // If no name to remove, return 0.
569 //
570 // Examples:
571 // skipelem("a/bb/c", name) = "bb/c", setting name = "a"
572 // skipelem("///a//bb", name) = "bb", setting name = "a"
573 // skipelem("a", name) = "", setting name = "a"
574 // skipelem("", name) = skipelem("////", name) = 0
575 //
576 static char*
577 skipelem(char *path, char *name)
578 {
579 char *s;
580 int len;
581
582 while(*path == '/')
583 path++;
584 if(*path == 0)
585 return 0;
586 s = path;
587 while(*path != '/' && *path != 0)
588 path++;
589 len = path - s;
590 if(len >= DIRSIZ)
591 memmove(name, s, DIRSIZ);
592 else {
593 memmove(name, s, len);
594 name[len] = 0;
595 }
596 while(*path == '/')
597 path++;
598 return path;
599 }
600
601 // Look up and return the inode for a path name.
602 // If parent != 0, return the inode for the parent and copy the final
603 // path element into name, which must have room for DIRSIZ bytes.
604 // Must be called inside a transaction since it calls iput().
605 static struct inode*
606 namex(char *path, int nameiparent, char *name)
607 {
608 struct inode *ip, *next;
609
610 if(*path == '/')
611 ip = iget(ROOTDEV, ROOTINO);
612 else
613 ip = idup(proc->cwd);
614
615 while((path = skipelem(path, name)) != 0){
616 ilock(ip);
617 if(ip->type != T_DIR){
618 iunlockput(ip);
619 return 0;
620 }
621 if(nameiparent && *path == '\0'){
622 // Stop one level early.
623 iunlock(ip);
624 return ip;
625 }
626 if((next = dirlookup(ip, name, 0)) == 0){
627 iunlockput(ip);
628 return 0;
629 }
630 iunlockput(ip);
631 ip = next;
632 }
633 if(nameiparent){
634 iput(ip);
635 return 0;
636 }
637 return ip;
638 }
639
640 struct inode*
641 namei(char *path)
642 {
643 char name[DIRSIZ];
644 return namex(path, 0, name);
645 }
646
647 struct inode*
648 nameiparent(char *path, char *name)
649 {
650 return namex(path, 1, name);
651 }