The C Programming Language - UNIX system interface

Edusagar - notes - UNIX system interface
  • Everything is a file in unix.

    In unix, if you want to open a file using open() system call returns back with a non-negative integer known as file descriptor(similar to FILE * returned by fopen()). Whenever a program is running, three files are already open - standard input(0), standard output(1) and standard error(2).

  • prog < infile > outfile

    I/O redirections like are achieved by shell changing the standard file descriptors and modifying them to point to named files. The program doesnt know where its input comes from and where its output goes to, as long as it uses file 0 for input and 1 for output.

  • int n_read = read(int fd, char *buf, int n);
    int n_written = write(int fd, char *buf, int n);

    Each call returns a count of the number of bytes transferred. On reading, the number of bytes returned may be less than the number requested. A return value of zero bytes implies end of file, and -1 indicates an error of some sort. For writing, the return value is the number of bytes written; an error has occurred if this isn't equal to the number requested.

  • Reading/Writing large sizes in one system call is efficient because fewer system calls will be made.

  • #include "syscalls.h"
    /* getchar: unbuffered single character input */
    int getchar(void)
    {
    	char c;
    	return (read(0, &c, 1) == 1) ? (unsigned char) c : EOF;
    }
    
    
    #include "syscalls.h"
    /*getchar: simple buffered version */
    int getchar(void)
    {
    	static char buf[BUFSIZE];
    	static char *bufp = buf;
    	static int n = 0;
    	
    	if (n == 0) {
    		n = read(0, buf, sizeof(buf));
    		bufp = &buf[0];
    	}
    	
    	return (--n >= 0) ? (unsigned char) *bufp++ : EOF;
    }

    If these versions of getchar were to be compiled with <stdio.h> included, it would be necessary to #undef the name getchar in case it is implemented as a macro.

  • #include <fcntl.h>
    int fd;
    int open(char *name, int flags, int perms);
    fd = open(name, flags, perms);

    The name argument is a character string containing the filename. The second argument, flags, is an int that specifies how the file is to be opened; the main values are:

    • O_RDONLY open for reading only
    • O_WRONLY open for writing only
    • O_RDWR open for both reading and writing

    These constants are defined in <fcntl.h> on System V UNIX systems, and in <sys/file.h> on Berkeley (BSD) versions.

  • It is an error to try to open a file that does not exist. The system call creat is provided to create new files, or to re-write old ones. If the file already exists, it will truncate the file to length zero.

    int creat(char *name, int perms);

    fd = creat(name, perms);

  • In Unix file system, there are 9 bits of permission information associated with each file that control read, write and execute access for the owner of the file, for owner's group and for all others. e.g. 0755 (octal value) specifies read, write and execute permissions for the owner, and read and execute permissions for the group and other users.

  • The standard library function vprintf is like printf except that the variable argument list is replaced by a single argument that has been initialized by calling the va_start macro. Similarly, vfprintf and vsprintf match fprintf and sprintf.

  • #include <stdio.h>
    #include <stdarg.h>
    /* error: print an error message and die */
    void error(char *fmt, ...)
    {
    	va_list args;
    	va_start(args, fmt);
    	fprintf(stderr, "error: ");
    	vprintf(stderr, fmt, args);
    	fprintf(stderr, "\n");
    	va_end(args);
    	exit(1);
    }
  • close(fd) is used to free the resources attached with an open file.

    The function unlink(char *name) removes the file name from the file system. It corresponds to the standard library function remove.

  • The system call lseek provides a way to move around in a file without reading or writing any data:

    long lseek(int fd, long offset, int origin);

    above call sets the current position in the file whose descriptor is fd to offset, which is taken relative to the location specified by origin. origin can be 0, 1, or 2 to specify that offset is to be measured from begining, current position or from the end of the file respectively.

  • getchar and putchar implementation

                    #define NULL 0
    #define EOF (-1)
    #define BUFSIZE 1024
    #define OPEN_MAX 20 /* max #files open at once */
    
    typedef struct _iobuf {
    	int cnt; 	/* characters left */
    	char *ptr; 	/* next character position */
    	char *base; /* location of buffer */
    	int flag; 	/* mode of file access */
    	int fd; 	/* file descriptor */
    } FILE;
    
    extern FILE _iob[OPEN_MAX];
    
    #define stdin (&_iob[0])
    #define stdout (&_iob[1])
    #define stderr  (&_iob[2])
    
    enum _flags {
    	_READ  = 01   /* file open for reading */
    	_WRITE = 02   /* file open for writing */
    	_UNBUF = 04   /* file is unbuffered */
    	_EOF   = 010  /* EOF has occurred on this file */
    	_ERR   = 020  /* error occurred on this file */
    };
    
    int _fillbuf(FILE *);
    int _flushbuf(int, FILE *);
    
    #define feof(p)   ((p)->flag & _EOF) != 0)
    #define ferror(p) ((p)->flag & _ERR) != 0)
    #define fileno(p) ((p)->fd)
    
    #define getc(p) (--(p)->cnt >= 0 \
    				? (unsigned char) *(p)>ptr++ : _fillbuf(p))
    #define putc(x,p) (--(p)->cnt >= 0 \
    				? *(p)->ptr++ = (x) : _flushbuf((x),p))
    #define getchar() getc(stdin)
    #define putchar(x) putc((x), stdout)
  • fopen() using standard system calls

    #include <fcntl.h>
    #include "syscalls.h"
    #define PERMS 0666
    /* RW for owner, group, others */
    FILE *fopen(char *name, char *mode)
    {
    	int fd;
    	FILE *fp;
    
    	if (*mode != 'r' && *mode != 'w' && *mode != 'a')
    		return NULL;
    	for (fp = _iob; fp < _iob + OPEN_MAX; fp++)
    		if ((fp->flag & (_READ | _WRITE)) == 0)
    			break; 	/* found free slot */
    	if (fp >= _iob + OPEN_MAX) /* no free slots */
    		return NULL;
    
    	if (*mode == 'w')
    		fd = creat(name, PERMS);
    	else if (*mode == 'a') {
    		if ((fd = open(name, O_WRONLY, 0)) == -1)
    			fd = creat(name, PERMS);
    		lseek(fd, 0L, 2);
    	} else
    		fd = open(name, O_RDONLY, 0);
    	
    	if (fd == -1) 	/* couldn't access name */
    		return NULL;
    	
    	fp->fd = fd;
    	fp->cnt = 0;
    	fp->base = NULL;
    	fp->flag = (*mode == 'r') ? _READ : _WRITE;
    	return fp;
    }
    
  • FILE _iob[OPEN_MAX] = {
    	/* stdin, stdout, stderr */
    	{ 0, (char *) 0, (char *) 0, _READ, 0 },
    	{ 0, (char *) 0, (char *) 0, _WRITE, 1 },
    	{ 0, (char *) 0, (char *) 0, _WRITE | _UNBUF, 2 }
    };
  • The inode for a file is where all information about the file except its name is kept. A directory entry generally consists of only two items, the filename and an inode number.

  • The system call stat takes a filename and returns all of the information in the inode for that file, or -1 if there is an error. That is,


    char *name;
    struct stat stbuf;
    int stat(char *, struct stat *);
    stat(name, &stbuf);
  • list recursively all the files in a directory

    int stat(char *, struct stat *);
    void dirwalk(char *, void (*fcn)(char *));
    /* fsize: print the name of file "name" along with its size */
    void fsize(char *name)
    {
    	struct stat stbuf;
    	if (stat(name, &stbuf) == -1) {
    		fprintf(stderr, "fsize: can't access %s\n", name);
    		return;
    	}
    	if ((stbuf.st_mode & S_IFMT) == S_IFDIR)
    		dirwalk(name, fsize);
    	printf("%8ld %s\n", stbuf.st_size, name);
    }
    
    #define MAX_PATH 1024
    /* dirwalk: apply fcn to all files in dir */
    void dirwalk(char *dir, void (*fcn)(char *))
    {
    	char name[MAX_PATH];
    	Dirent *dp;
    	DIR *dfd;
    
    	if ((dfd = opendir(dir)) == NULL) {
    		fprintf(stderr, "dirwalk: can't open %s\n", dir);
    		return;
    	}
    	while ((dp = readdir(dfd)) != NULL) {
    		if (strcmp(dp->name, ".") == 0
    			|| strcmp(dp->name, ".."))
    			continue; 		/* skip self and parent */
    
    		if (strlen(dir)+strlen(dp>name)+2 > sizeof(name))
    			fprintf(stderr, "dirwalk: name %s %s too long\n", dir, dp->name);
    		else {
    			sprintf(name, "%s/%s", dir, dp->name);
    			(*fcn)(name);
    		}
    	}
    	closedir(dfd);
    }
  • Storage Allocator

    The space that malloc manages may not be contiguous. Thus its free storage is kept as a list of free blocks. Each block contains a size, a pointer to the next block, and the space itself. The blocks are kept in order of increasing storage address, and the last block (highest address) points to the first.


    When a request is made, the free list is scanned until a big-enough block is found. This algorithm is called first fit, by contrast with best fit, which looks for the smallest block that will satisfy the request. If the block is exactly the size requested it is unlinked from the list and returned to the user. If the block is too big, it is split, and the proper amount is returned to the user while the residue remains on the free list. If no big-enough block is found, another large chunk is obtained by the operating system and linked into the free list.

    Freeing also causes a search of the free list, to find the proper place to insert the block being freed. If the block being freed is adjacent to a free block on either side, it is coalesced with it into a single bigger block, so storage does not become too fragmented.

    To simplify alignment, all blocks are multiples of the header size, and the header is aligned properly. This is achieved by a union that contains the desired header structure and an instance of the most restrictive alignment type, which we have arbitrarily made a long:


    typedef long Align; /* for alignment to long boundary */
    union header {      /* block header */
    	struct {
    		union header *ptr;  /* next block if on free list */
    		unsigned size;      /* size of this block */
    	} s;
    	Align x;               /* force alignment of blocks */
    };
    typedef union header Header;

    The Align field is never used; it just forces each header to be aligned on a worst-case boundary.


    The variable base is used to get started. If freep is NULL, as it is at the first call of malloc, then a degenerate free list is created; it contains one block of size zero, and points to itself. In any case, the free list is then searched. The search for a free block of adequate size begins at the point (freep) where the last block was found; this strategy helps keep the list homogeneous. If a too-big block is found, the tail end is returned to the user; in this way the header of the original needs only to have its size adjusted. In all cases, the pointer returned to the user points to the free space within the block, which begins one unit beyond the header.


    static Header base ; /*empty list to get started */
    static Header *freep = NULL  /* start of free list */
    
    /* malloc: general purpose storage allocator */
    void *malloc(unsigned nbytes) 
    {
    	Header *p, *prevp;
    	Header *morecore(unsigned);
    	unsigned nunits;
    	
    	nunits = (nbytes + sizeof(Header) - 1)/sizeof(Header) + 1;
    	if ((prevp = freep) == NULL) { /* no free list yet */
    		base.s.ptr = freep = prevp = &base;
    		base.s.size = 0;
    	}
    	
    	for (p = prevp->s.ptr; ; prevp = p; p = p->s.ptr) {
    		if (p->s.size >= nunits) { /*big enough*/
    			if (p->s.size == nunits) { /*exact match */
    				prevp->s.ptr = p->s.ptr;
    			} else {
    				p->s.size -= nunits;
    				p += p->s.size;
    				p->s.size = nunits;
    			}
    			freep = prevp;
    			return (void*) (p+1);
    		}
    		if (p == freep) { /* wrapped around free list */
    			if ((p = morecore(nunits)) == NULL)
    				return NULL;
    		}
    	}
    }
    
    
    #define NALLOC 1024     /* minimum #units to request */
    /* morecore: ask system for more memory */
    static Header *morecore(unsigned nu)
    {
    	char *cp, *sbrk(int);
    	Header *up;
    	
    	if (nu < NALLOC) {
    		nu = NALLOC;
    	
    	cp = sbrk(nu * sizeof(Header));
    	if (cp = (char*) -1) {
    		return NULL;
    	}
    	
    	up = (Header *) cp;
    	up.s.size = nu;
    	free((void*)up+1);
    	return freep;
    }
    
    /* free: put block ap in free list */
    void free(void *ap)
    {
    	Header *bp, *p;
    	bp = (Header *)ap - 1;     /* point to block header */
    	for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr)
    		if (p >= p->s.ptr && (bp > p || bp < p->s.ptr))
    			break; /* freed block at start or end of arena */
    	
    	if (bp + bp->size == p->s.ptr) {   /* join to upper nbr */
    		bp->s.size += p->s.ptr->s.size;
    		bp->s.ptr = p->s.ptr->s.ptr;
    	} else
    		bp->s.ptr = p->s.ptr;
    	if (p + p->size == bp) {  /* join to lower nbr */
    		p->s.size += bp->s.size;
    		p->s.ptr = bp->s.ptr;
    	} else
    		p->s.ptr = bp;
    	freep = p;
    }
    
comments powered by Disqus