blob: 1f8e0ab92dd17167e1789b2a2584bb0accc89ad6 [file] [log] [blame] [edit]
/* Barret Rhoden <brho@cs.berkeley.edu>
*
* VFS, based on the Linux VFS as described in LKD 2nd Ed (Robert Love) and in
* UTLK (Bovet/Cesati) , which was probably written by Linus. A lot of it was
* changed (reduced) to handle what ROS will need, at least initially.
* Hopefully it'll be similar enough to interface with ext2 and other Linux
* FSs.
*
* struct qstr came directly from Linux.
* Lawyers can sort out the copyrights and whatnot with these interfaces and
* structures. */
#pragma once
#include <ros/common.h>
#include <ros/limits.h>
#include <sys/queue.h>
#include <sys/uio.h>
#include <bitmask.h>
#include <kref.h>
#include <time.h>
#include <radix.h>
#include <hashtable.h>
#include <pagemap.h>
#include <blockdev.h>
#include <fdtap.h>
/* ghetto preprocessor hacks (since proc includes vfs) */
struct page;
struct vm_region;
// TODO: temp typedefs, etc. remove when we support this stuff.
typedef int dev_t;
typedef int kdev_t;
typedef int ino_t;
struct io_writeback {int x;};
struct event_poll {int x;};
struct poll_table_struct {int x;};
// end temp typedefs. note ino and off_t are needed in the next include
#include <ros/fs.h>
/* Create flags are those used only during creation, and not saved for later
* lookup or use. Everything other than them is viewable via getfl */
#define O_CREAT_FLAGS (O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC)
/* These flags are those you can attempt to set via setfl for the VFS. */
#define O_FCNTL_SET_FLAGS (O_APPEND | O_ASYNC | O_DIRECT | O_NOATIME | \
O_NONBLOCK)
struct super_block;
struct super_operations;
struct dentry;
struct dentry_operations;
struct inode;
struct inode_operations;
struct file;
struct file_operations;
struct fs_type;
struct vfsmount;
struct pipe_inode_info;
/* List def's we need */
TAILQ_HEAD(sb_tailq, super_block);
TAILQ_HEAD(dentry_tailq, dentry);
SLIST_HEAD(dentry_slist, dentry);
TAILQ_HEAD(inode_tailq, inode);
SLIST_HEAD(inode_slist, inode);
TAILQ_HEAD(file_tailq, file);
TAILQ_HEAD(io_wb_tailq, io_writeback);
TAILQ_HEAD(event_poll_tailq, event_poll);
TAILQ_HEAD(vfsmount_tailq, vfsmount);
TAILQ_HEAD(fs_type_tailq, fs_type);
/* Linux's quickstring - saves recomputing the hash and length. Note the length
* is the non-null-terminated length, as you'd get from strlen(). (for now) */
struct qstr {
unsigned int hash;
unsigned int len;
char *name;
};
/* Helpful structure to pass around during lookup operations. At each point,
* it tracks the the answer, the name of the previous, how deep the symlink
* following has gone, and the symlink pathnames. *dentry and *mnt up the
* refcnt of those objects too, so whoever 'receives; this will need to decref.
* This is meant to be pinning only the 'answer' to a path_lookup, and not the
* intermediate steps. The intermediates get pinned due to the existence of
* their children in memory. Internally, the VFS will refcnt any item whenever
* it is in this struct. The last_sym is needed to pin the dentry (and thus the
* inode and char* storage for the symname) for the duration of a lookup. When
* you resolve a pathname, you need to keep its string in memory. */
#define MAX_SYMLINK_DEPTH 6 // arbitrary.
struct nameidata {
struct dentry *dentry; /* dentry of the obj */
struct vfsmount *mnt; /* its mount pt */
struct qstr last; /* last component in search */
int flags; /* lookup flags */
int last_type; /* type of last component */
unsigned int depth; /* search's symlink depth */
int intent; /* access type for the file */
struct dentry *last_sym; /* pins the symname */
};
/* nameidata lookup flags and access type fields */
#define LOOKUP_FOLLOW 0x01 /* if the last is a symlink, follow */
#define LOOKUP_DIRECTORY 0x02 /* last component must be a directory */
#define LOOKUP_CONTINUE 0x04 /* still filenames to go */
#define LOOKUP_PARENT 0x08 /* lookup the dir that includes the item */
/* These are the nd's intent */
#define LOOKUP_OPEN 0x10 /* intent is to open a file */
#define LOOKUP_CREATE 0x11 /* create a file if it doesn't exist */
#define LOOKUP_ACCESS 0x12 /* access / check user permissions */
/* Superblock: Specific instance of a mounted filesystem. All synchronization
* is done with the one spinlock. */
struct super_block {
TAILQ_ENTRY(super_block) s_list; /* list of all sbs */
dev_t s_dev; /* id */
unsigned long s_blocksize;
bool s_dirty;
unsigned long long s_maxbytes; /* max file size */
struct fs_type *s_type;
struct super_operations *s_op;
unsigned long s_flags;
unsigned long s_magic;
struct vfsmount *s_mount; /* vfsmount point */
spinlock_t s_lock; /* used for all sync */
struct kref s_kref;
bool s_syncing; /* currently syncing metadata */
struct inode_tailq s_inodes; /* all inodes */
struct inode_tailq s_dirty_i; /* dirty inodes */
struct io_wb_tailq s_io_wb; /* writebacks */
struct file_tailq s_files; /* assigned files */
struct dentry_tailq s_lru_d; /* unused dentries (in dcache)*/
spinlock_t s_lru_lock;
struct hashtable *s_dcache; /* dentry cache */
spinlock_t s_dcache_lock;
struct hashtable *s_icache; /* inode cache */
spinlock_t s_icache_lock;
struct block_device *s_bdev;
TAILQ_ENTRY(super_block) s_instances; /* list of sbs of this fs type*/
char s_name[32];
void *s_fs_info;
};
struct super_operations {
struct inode *(*alloc_inode) (struct super_block *sb);
void (*dealloc_inode) (struct inode *);
void (*read_inode) (struct inode *);
void (*dirty_inode) (struct inode *);
void (*write_inode) (struct inode *, bool);
void (*put_inode) (struct inode *); /* when decreffed */
void (*drop_inode) (struct inode *); /* when about to destroy */
void (*delete_inode) (struct inode *); /* deleted from disk */
void (*put_super) (struct super_block *); /* releases sb */
void (*write_super) (struct super_block *); /* sync with sb on disk */
int (*sync_fs) (struct super_block *, bool);
int (*remount_fs) (struct super_block *, int, char *);
void (*umount_begin) (struct super_block *);/* called by NFS */
};
/* Sets the type of file, IAW the bits in ros/fs.h */
#define SET_FTYPE(mode, type) ((mode) = ((mode) & ~__S_IFMT) | (type))
/* Will need a bunch of states/flags for an inode. TBD */
#define I_STATE_DIRTY 0x001
/* Inode: represents a specific file */
struct inode {
SLIST_ENTRY(inode) i_hash; /* inclusion in a hash table */
TAILQ_ENTRY(inode) i_sb_list; /* all inodes in the FS */
TAILQ_ENTRY(inode) i_list; /* describes state (dirty) */
struct dentry_tailq i_dentry; /* all dentries pointing here*/
unsigned long i_ino;
struct kref i_kref;
int i_mode; /* access mode and file type */
unsigned int i_nlink; /* hard links */
uid_t i_uid;
gid_t i_gid;
kdev_t i_rdev; /* real device node */
size_t i_size;
unsigned long i_blksize;
unsigned long i_blocks; /* filesize in blocks */
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
spinlock_t i_lock;
struct inode_operations *i_op;
struct file_operations *i_fop;
struct super_block *i_sb;
struct page_map *i_mapping; /* usually points to i_pm */
struct page_map i_pm; /* this inode's page cache */
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct char_device *i_cdev;
};
unsigned long i_state;
unsigned long dirtied_when; /* in jiffies */
unsigned int i_flags; /* filesystem mount flags */
bool i_socket;
atomic_t i_writecount; /* number of writers */
void *i_fs_info;
};
struct inode_operations {
int (*create) (struct inode *, struct dentry *, int, struct nameidata *);
struct dentry *(*lookup) (struct inode *, struct dentry *,
struct nameidata *);
int (*link) (struct dentry *, struct inode *, struct dentry *);
int (*unlink) (struct inode *, struct dentry *);
int (*symlink) (struct inode *, struct dentry *, const char *);
int (*mkdir) (struct inode *, struct dentry *, int);
int (*rmdir) (struct inode *, struct dentry *);
int (*mknod) (struct inode *, struct dentry *, int, dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
char *(*readlink) (struct dentry *);
void (*truncate) (struct inode *); /* set i_size before calling */
int (*permission) (struct inode *, int, struct nameidata *);
};
#define DNAME_INLINE_LEN 32
/* Dentry flags. All negatives are also unused. */
#define DENTRY_USED 0x01 /* has a kref > 0 */
#define DENTRY_NEGATIVE 0x02 /* cache of a failed lookup */
#define DENTRY_DYING 0x04 /* should be freed on release */
/* Dentry: in memory object, corresponding to an element of a path. E.g. /,
* usr, bin, and vim are all dentries. All have inodes. Vim happens to be a
* file instead of a directory.
* They can be used (valid inode, currently in use), unused (valid, not used),
* or negative (not a valid inode (deleted or bad path), but kept to resolve
* requests quickly. If none of these, dealloc it back to the slab cache.
* Unused and negatives go in the LRU list. */
struct dentry {
struct kref d_kref; /* don't discard when 0 */
unsigned long d_flags; /* dentry cache flags */
spinlock_t d_lock;
struct inode *d_inode;
TAILQ_ENTRY(dentry) d_lru; /* unused list */
TAILQ_ENTRY(dentry) d_alias; /* linkage for i_dentry */
struct dentry_tailq d_subdirs;
TAILQ_ENTRY(dentry) d_subdirs_link;
unsigned long d_time; /* revalidate time (jiffies)*/
struct dentry_operations *d_op;
struct super_block *d_sb;
bool d_mount_point; /* is an FS mounted over here */
struct vfsmount *d_mounted_fs; /* fs mounted here */
struct dentry *d_parent;
struct qstr d_name; /* pts to iname and holds hash*/
char d_iname[DNAME_INLINE_LEN];
void *d_fs_info;
};
/* Checks is a struct dentry pointer if the root.
*/
#define DENTRY_IS_ROOT(d) ((d) == (d)->d_parent)
/* not sure yet if we want to call delete when refcnt == 0 (move it to LRU) or
* when its time to remove it from the dcache. */
struct dentry_operations {
int (*d_revalidate) (struct dentry *, struct nameidata *);
int (*d_hash) (struct dentry *, struct qstr *);
int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
int (*d_delete) (struct dentry *);
int (*d_release) (struct dentry *);
void (*d_iput) (struct dentry *, struct inode *);
};
/* Yanked from glibc-2.11.1/posix/unistd.h */
#define SEEK_SET 0 /* Seek from beginning of file. */
#define SEEK_CUR 1 /* Seek from current position. */
#define SEEK_END 2 /* Seek from end of file. */
/* File: represents a file opened by a process. */
struct file {
TAILQ_ENTRY(file) f_list; /* list of all files */
struct dentry *f_dentry; /* definitely not inode. =( */
struct vfsmount *f_vfsmnt;
struct file_operations *f_op;
struct kref f_kref;
unsigned int f_flags; /* O_APPEND, etc */
int f_mode; /* O_RDONLY, etc */
off64_t f_pos; /* offset / file pointer */
unsigned int f_uid;
unsigned int f_gid;
int f_error;
struct event_poll_tailq f_ep_links;
spinlock_t f_ep_lock;
void *f_privdata; /* tty/socket driver hook */
struct page_map *f_mapping; /* page cache mapping */
/* Ghetto appserver support */
int fd; // all it contains is an appserver fd (for pid 0, aka kernel)
int refcnt;
spinlock_t lock;
};
struct file_operations {
int (*llseek) (struct file *, off64_t, off64_t *, int);
ssize_t (*read) (struct file *, char *, size_t, off64_t *);
ssize_t (*write) (struct file *, const char *, size_t, off64_t *);
int (*readdir) (struct file *, struct dirent *);
int (*mmap) (struct file *, struct vm_region *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, struct dentry *, int);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
off64_t *);
ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
off64_t *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, off64_t,
int);
int (*check_flags) (int flags); /* most FS's ignore this */
};
/* FS structs. One of these per FS (e.g., ext2) */
struct fs_type {
const char *name;
int fs_flags;
struct super_block *(*get_sb) (struct fs_type *, int,
char *, struct vfsmount *);
void (*kill_sb) (struct super_block *);
TAILQ_ENTRY(fs_type) list;
struct sb_tailq fs_supers; /* all of this FS's sbs */
};
/* A mount point: more focused on the mounting, and solely in memory, compared
* to the SB which is focused on FS definitions (and exists on disc). */
struct vfsmount {
TAILQ_ENTRY(vfsmount) mnt_list;
struct vfsmount *mnt_parent;
struct dentry *mnt_mountpoint;/* parent dentry where mnted */
struct dentry *mnt_root; /* dentry of root of this fs */
struct super_block *mnt_sb;
struct vfsmount_tailq mnt_child_mounts;
TAILQ_ENTRY(vfsmount) mnt_child_link;
struct kref mnt_kref;
int mnt_flags;
char *mnt_devname;
struct namespace *mnt_namespace;
};
struct pipe_inode_info
{
char *p_buf;
size_t p_rd_off;
size_t p_wr_off;
unsigned int p_nr_readers;
unsigned int p_nr_writers;
struct cond_var p_cv;
};
/* Per-process structs */
#define NR_OPEN_FILES_DEFAULT 32
#define NR_FILE_DESC_DEFAULT 32
/* Bitmask for file descriptors, big for when we exceed the initial small. We
* could just use the fd_array to check for openness instead of the bitmask,
* but eventually we might want to use the bitmasks for other things (like
* which files are close_on_exec. */
typedef struct fd_set {
uint8_t fds_bits[BYTES_FOR_BITMASK(NR_FILE_DESC_MAX)];
} fd_set;
struct small_fd_set {
uint8_t fds_bits[BYTES_FOR_BITMASK(NR_FILE_DESC_DEFAULT)];
};
/* Helper macros to manage fd_sets */
#define FD_SET(n, p) ((p)->fds_bits[(n)/8] |= (1 << ((n) & 7)))
#define FD_CLR(n, p) ((p)->fds_bits[(n)/8] &= ~(1 << ((n) & 7)))
#define FD_ISSET(n,p) ((p)->fds_bits[(n)/8] & (1 << ((n) & 7)))
#define FD_ZERO(p) memset((void*)(p),0,sizeof(*(p)))
/* Describes an open file. We need this, since the FD flags are supposed to be
* per file descriptor, not per file (like the file status flags). */
struct chan; /* from 9ns */
struct file_desc {
struct file *fd_file;
struct chan *fd_chan;
unsigned int fd_flags;
struct fd_tap *fd_tap;
};
/* All open files for a process */
struct fd_table {
spinlock_t lock;
bool closed;
int max_files; /* max files ptd to by fd */
int max_fdset; /* max of the current fd_set */
int hint_min_fd; /* <= min available fd */
struct file_desc *fd; /* initially pts to fd_array */
struct fd_set *open_fds; /* init, pts to open_fds_init */
struct small_fd_set open_fds_init;
struct file_desc fd_array[NR_OPEN_FILES_DEFAULT];
};
/* Process specific filesystem info */
struct fs_struct {
spinlock_t lock;
int umask;
struct dentry *root;
struct dentry *pwd;
};
/* Each process can have its own (eventually), but default to the same NS */
struct namespace {
struct kref kref;
spinlock_t lock;
struct vfsmount *root;
struct vfsmount_tailq vfsmounts; /* all vfsmounts in this ns */
};
/* Global Structs */
extern struct sb_tailq super_blocks; /* list of all sbs */
extern spinlock_t super_blocks_lock;
extern struct fs_type_tailq file_systems; /* lock this if it's dynamic */
extern struct namespace default_ns;
/* Slab caches for common objects */
extern struct kmem_cache *dentry_kcache;
extern struct kmem_cache *inode_kcache;
extern struct kmem_cache *file_kcache;
/* Misc VFS functions */
void vfs_init(void);
void qstr_builder(struct dentry *dentry, char *l_name);
char *file_name(struct file *file);
char *dentry_path(struct dentry *dentry, char *path, size_t max_size);
int path_lookup(char *path, int flags, struct nameidata *nd);
void path_release(struct nameidata *nd);
int mount_fs(struct fs_type *fs, char *dev_name, char *path, int flags);
static inline char *file_abs_path(struct file *f, char *path, size_t max_size)
{
return dentry_path(f->f_dentry, path, max_size);
}
/* Superblock functions */
struct super_block *get_sb(void);
void init_sb(struct super_block *sb, struct vfsmount *vmnt,
struct dentry_operations *d_op, unsigned long root_ino,
void *d_fs_info);
/* Dentry Functions */
struct dentry *get_dentry_with_ops(struct super_block *sb,
struct dentry *parent, char *name,
struct dentry_operations *d_op);
struct dentry *get_dentry(struct super_block *sb, struct dentry *parent,
char *name);
void dentry_release(struct kref *kref);
void __dentry_free(struct dentry *dentry);
struct dentry *lookup_dentry(char *path, int flags);
struct dentry *dcache_get(struct super_block *sb, struct dentry *what_i_want);
void dcache_put(struct super_block *sb, struct dentry *key_val);
struct dentry *dcache_remove(struct super_block *sb, struct dentry *key);
void dcache_prune(struct super_block *sb, bool negative_only);
int generic_dentry_hash(struct dentry *dentry, struct qstr *qstr);
/* Inode Functions */
struct inode *get_inode(struct dentry *dentry);
void load_inode(struct dentry *dentry, unsigned long ino);
int create_file(struct inode *dir, struct dentry *dentry, int mode);
int create_dir(struct inode *dir, struct dentry *dentry, int mode);
int create_symlink(struct inode *dir, struct dentry *dentry,
const char *symname, int mode);
int check_perms(struct inode *inode, int access_mode);
void inode_release(struct kref *kref);
void stat_inode(struct inode *inode, struct kstat *kstat);
struct inode *icache_get(struct super_block *sb, unsigned long ino);
void icache_put(struct super_block *sb, struct inode *inode);
struct inode *icache_remove(struct super_block *sb, unsigned long ino);
/* File-ish functions */
ssize_t generic_file_read(struct file *file, char *buf, size_t count,
off64_t *offset);
ssize_t generic_file_write(struct file *file, const char *buf, size_t count,
off64_t *offset);
ssize_t generic_dir_read(struct file *file, char *u_buf, size_t count,
off64_t *offset);
struct file *alloc_file(void);
struct file *do_file_open(char *path, int flags, int mode);
int do_symlink(char *path, const char *symname, int mode);
int do_link(char *old_path, char *new_path);
int do_unlink(char *path);
int do_access(char *path, int mode);
int do_file_chmod(struct file *file, int mode);
int do_mkdir(char *path, int mode);
int do_rmdir(char *path);
int do_pipe(struct file **pipe_files, int flags);
int do_rename(char *old_path, char *new_path);
int do_truncate(struct inode *inode, off64_t len);
struct file *dentry_open(struct dentry *dentry, int flags);
void file_release(struct kref *kref);
ssize_t kread_file(struct file *file, void *buf, size_t sz);
void *kread_whole_file(struct file *file);
/* Process-related File management functions */
void *lookup_fd(struct fd_table *fdt, int fd, bool incref, bool vfs);
int insert_obj_fdt(struct fd_table *fdt, void *obj, int low_fd, int fd_flags,
bool must_use_low, bool vfs);
bool close_fd(struct fd_table *fdt, int fd);
void close_fdt(struct fd_table *open_files, bool cloexec);
void clone_fdt(struct fd_table *src, struct fd_table *dst);
struct file *get_file_from_fd(struct fd_table *open_files, int fd);
void put_file_from_fd(struct fd_table *open_files, int file_desc);
int insert_file(struct fd_table *open_files, struct file *file, int low_fd,
bool must, bool cloexec);
int do_chdir(struct fs_struct *fs_env, char *path);
int do_fchdir(struct fs_struct *fs_env, struct file *file);
char *do_getcwd(struct fs_struct *fs_env, char **kfree_this, size_t cwd_l);
/* Debugging */
void print_kstat(struct kstat *kstat);
int ls_dash_r(char *path);
extern struct inode_operations dummy_i_op;
extern struct dentry_operations dummy_d_op;