File#
Introduction#
File I/O is fundamental to Unix programming. The Unix philosophy of “everything
is a file” means that the same system calls—open(), read(), write(),
close()—work for regular files, devices, pipes, and sockets. This unified
interface simplifies programming and enables powerful composition of tools.
Unix provides two levels of file I/O: low-level system calls (open, read,
write) that work with file descriptors, and the standard C library’s buffered
I/O (fopen, fread, fwrite) that provides automatic buffering and
portability. Low-level calls offer more control and are necessary for certain
operations like file locking and memory mapping, while buffered I/O is more
convenient for text processing and sequential access.
File Size with lseek#
- Source:
The lseek() system call repositions the file offset for an open file
descriptor. By seeking to the end of the file (SEEK_END) and reading
the resulting offset, you can determine the file size. This technique works
for regular files but not for pipes, sockets, or some device files where
seeking is not supported.
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
int fd = open(argv[1], O_RDONLY);
if (fd < 0) return 1;
off_t start = lseek(fd, 0, SEEK_SET);
off_t end = lseek(fd, 0, SEEK_END);
printf("File size: %lld bytes\n", (long long)(end - start));
close(fd);
}
$ echo "Hello" > hello.txt
$ ./lseek-size hello.txt
File size: 6 bytes
File Size with fstat#
- Source:
The fstat() system call retrieves metadata about an open file, including
its size, permissions, ownership, and timestamps. Unlike lseek(), fstat()
doesn’t modify the file offset and provides additional information. The stat()
variant works with a pathname instead of a file descriptor.
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
int main(int argc, char *argv[]) {
int fd = open(argv[1], O_RDONLY);
if (fd < 0) return 1;
struct stat st;
fstat(fd, &st);
printf("File size: %lld bytes\n", (long long)st.st_size);
close(fd);
}
$ echo "Hello" > hello.txt
$ ./fstat-size hello.txt
File size: 6 bytes
Copy File Contents#
- Source:
Copying a file involves reading from the source and writing to the destination
in a loop until all data is transferred. The buffer size affects performance:
larger buffers reduce system call overhead but use more memory. This example
also preserves the source file’s permissions using fstat() to read them
and passing them to open() when creating the destination.
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#define BUF_SIZE 4096
int main(int argc, char *argv[]) {
int sfd = open(argv[1], O_RDONLY);
struct stat st;
fstat(sfd, &st);
int dfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, st.st_mode);
char buf[BUF_SIZE];
ssize_t n;
while ((n = read(sfd, buf, BUF_SIZE)) > 0)
write(dfd, buf, n);
close(sfd);
close(dfd);
}
$ echo "Hello World" > source.txt
$ ./copy-file source.txt dest.txt
$ diff source.txt dest.txt
Read Lines from File#
- Source:
The getline() function reads an entire line from a stream, dynamically
allocating or reallocating the buffer as needed. It handles lines of any
length and includes the newline character in the result. The caller must
free the buffer when done. This is the preferred way to read lines in C
as it avoids buffer overflow vulnerabilities present in gets() and
the complexity of fgets() with unknown line lengths.
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
FILE *f = fopen(argv[1], "r");
if (!f) return 1;
char *line = NULL;
size_t len = 0;
while (getline(&line, &len, f) != -1)
printf("%s", line);
free(line);
fclose(f);
}
Read Entire File into Memory#
- Source:
For small to medium files, reading the entire contents into memory simplifies
processing. This pattern uses fseek() and ftell() to determine the file
size, allocates a buffer, then reads everything with a single fread() call.
Be cautious with large files as this approach can exhaust available memory.
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
FILE *f = fopen(argv[1], "r");
if (!f) return 1;
fseek(f, 0, SEEK_END);
long size = ftell(f);
rewind(f);
char *buf = malloc(size + 1);
fread(buf, 1, size, f);
buf[size] = '\0';
printf("%s", buf);
free(buf);
fclose(f);
}
Check File Types#
- Source:
The stat() function returns file metadata including the file type encoded
in the st_mode field. Unix supports several file types: regular files,
directories, symbolic links, block devices, character devices, FIFOs (named
pipes), and sockets. The S_IFMT mask extracts the type bits, and macros
like S_ISREG() and S_ISDIR() provide convenient type checking.
#include <stdio.h>
#include <sys/stat.h>
int main(int argc, char *argv[]) {
struct stat st;
if (stat(argv[1], &st) < 0) return 1;
switch (st.st_mode & S_IFMT) {
case S_IFREG: printf("Regular file\n"); break;
case S_IFDIR: printf("Directory\n"); break;
case S_IFLNK: printf("Symbolic link\n"); break;
case S_IFBLK: printf("Block device\n"); break;
case S_IFCHR: printf("Character device\n"); break;
case S_IFIFO: printf("FIFO/pipe\n"); break;
case S_IFSOCK: printf("Socket\n"); break;
default: printf("Unknown\n");
}
}
$ ./file-type /etc/hosts
Regular file
$ ./file-type /usr
Directory
$ ./file-type /dev/null
Character device
Directory Tree Walk#
- Source:
The nftw() function (new file tree walk) recursively traverses a directory
tree, calling a user-provided callback for each entry. It handles the complexity
of opening directories, reading entries, and managing recursion depth. The
FTW_DEPTH flag processes directory contents before the directory itself
(post-order), useful for operations like recursive deletion. FTW_PHYS
prevents following symbolic links.
#define _GNU_SOURCE
#include <stdio.h>
#include <ftw.h>
int callback(const char *path, const struct stat *sb,
int type, struct FTW *ftwbuf) {
printf("%s\n", path);
return 0;
}
int main(int argc, char *argv[]) {
nftw(argv[1], callback, 64, FTW_DEPTH | FTW_PHYS);
}
$ ./tree-walk .
./main.c
./a.out
.
Pipe to Shell Command with popen#
- Source:
The popen() function creates a pipe to a shell command, returning a
FILE* stream for reading the command’s output or writing to its input.
This simplifies executing external commands and processing their results
without manually managing fork(), exec(), and pipe(). Always
use pclose() to close the stream and retrieve the command’s exit status.
#include <stdio.h>
#include <sys/wait.h>
int main(void) {
FILE *fp = popen("ls -la", "r");
if (!fp) return 1;
char buf[256];
while (fgets(buf, sizeof(buf), fp))
printf("%s", buf);
int status = pclose(fp);
printf("Exit status: %d\n", WEXITSTATUS(status));
}
$ ./popen-cmd
total 32
drwxr-xr-x 3 user user 4096 Jan 6 10:00 .
-rwxr-xr-x 1 user user 8192 Jan 6 10:00 popen-cmd
...
Exit status: 0
Named Pipe (FIFO)#
- Source:
Named pipes (FIFOs) are similar to anonymous pipes but exist as filesystem
entries, allowing communication between unrelated processes. Create a FIFO
with mkfifo() or the mkfifo command. One process opens it for writing
and another for reading. The open blocks until both ends are connected.
FIFOs are useful for simple IPC without the complexity of sockets.
// writer.c
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
int main(void) {
mkfifo("/tmp/myfifo", 0666);
int fd = open("/tmp/myfifo", O_WRONLY);
write(fd, "Hello FIFO", 11);
close(fd);
}
// reader.c
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
int main(void) {
int fd = open("/tmp/myfifo", O_RDONLY);
char buf[128];
read(fd, buf, sizeof(buf));
printf("Received: %s\n", buf);
close(fd);
unlink("/tmp/myfifo");
}
# Terminal 1
$ ./writer
# Terminal 2
$ ./reader
Received: Hello FIFO