| /* Copyright (c) 2018 Google Inc |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * tree_file: structs and helpers to for a tree-based filesystem for 9ns |
| * devices. |
| */ |
| |
| #pragma once |
| |
| #include <fs_file.h> |
| #include <ns.h> |
| #include <list.h> |
| #include <rcu.h> |
| |
| struct tree_file; |
| struct tree_filesystem; |
| |
| /* The walk cache encapsulates a couple things related to caching, similar to |
| * the dentry cache in the VFS. |
| * - the parent -> child links in a hash table |
| * - the LRU list, consisting of all tree files with kref == 0 |
| * |
| * LRU notes. Once a TF gets linked into a tree: |
| * - a TF is on the LRU IFF its refcnt == 0 |
| * - a TF is on the LRU only if it is in the tree |
| * - any time kref gets set to 0, we consider putting it on the LRU (if not |
| * DISCONNECTED / in the tree). anytime it is increffed from 0, we yank it. |
| * - NEG entries are always kref == 0, and are on the LRU list if they are in |
| * the tree. They are never increffed, only rcu-read. |
| */ |
| struct walk_cache { |
| spinlock_t lru_lock; |
| struct list_head lru; |
| spinlock_t ht_lock; |
| struct hash_helper hh; /* parts are rcu-read */ |
| struct hlist_head *ht; |
| struct hlist_head static_ht[HASH_INIT_SZ]; |
| }; |
| |
| /* All ops that operate on a parent have the parent qlocked. |
| * |
| * free(): the TF is being freed. Might be called on TFs that the TFS is |
| * unaware of (e.g. lookup threw an error). |
| * |
| * unlink(): child is being unlinked from the parent. |
| * |
| * lookup(): backends fill in the child TF with whatever it knows about the |
| * entry. This function is optional. TFSs whose entire tree is in the frontend |
| * (e.g. ramfs) don't do lookups from the non-existent backend. This |
| * structure, positive or negative, will be used until it is unlinked. |
| * |
| * create(): given parent and it's initialized child struct, create the file and |
| * do whatever else you need (e.g. 9p backend does the create). Same as lookup, |
| * the TFS needs to fill in child->dir, except for name. Note we don't pass |
| * omode. That's handled in the device's front-end create: open the chan after |
| * creating. |
| * |
| * rename(): rename / move TF from the old_parent to the new_parent, and change |
| * the name to name (NULL == no change). Note that the rename op does *not* set |
| * the TF's name yet. Be careful changing any fields in the TF, since it is |
| * still in old_parent's HT. |
| * |
| * has_children(): TFSs that use the front end as a cache (e.g. #mnt) need to |
| * check with their backend to know if there are children or not. Checking the |
| * children list isn't necessarily sufficient. |
| */ |
| struct tree_file_ops { |
| void (*free)(struct tree_file *tf); |
| void (*unlink)(struct tree_file *parent, struct tree_file *child); |
| void (*lookup)(struct tree_file *parent, struct tree_file *child); |
| void (*create)(struct tree_file *parent, struct tree_file *child, |
| int perm); |
| void (*rename)(struct tree_file *tf, struct tree_file *old_parent, |
| struct tree_file *new_parent, const char *name, |
| int flags); |
| bool (*has_children)(struct tree_file *parent); |
| }; |
| |
| struct tree_filesystem { |
| struct walk_cache wc; |
| struct tree_file_ops tf_ops; |
| struct fs_file_ops fs_ops; |
| qlock_t rename_mtx; |
| struct tree_file *root; |
| void *priv; |
| }; |
| |
| /* The tree_file is an fs_file (i.e. the first struct field) that exists in a |
| * tree filesystem with parent/child relationships. |
| * |
| * Children hold krefs on their parents. Parents have weak refs (i.e. not a |
| * kref) on their children. Parent's have a list of *known* children. The |
| * backend of the FS (e.g. the other side of #mnt) may know of other children. |
| * In this way, the parent's list is a cache. For devices like a ramfs or any |
| * dynamic, completely tree-based system (e.g. #srv), the tree filesystem is the |
| * authoritative FS; there is no backend. |
| * |
| * If we know of a parent-child relationship (i.e. it's in the tree_filesystem), |
| * then the following invariants are true: |
| * - The child is in the parent's list. |
| * - The child->parent == the parent's tree_file |
| * - There is an entry in the walk cache from {parent,name} -> child_tree_file. |
| * - The backend FS (if there is one) knows about the parent-child link |
| * |
| * To change these, hold the parent's qlock and you must change them all, |
| * with a slight exception: you must *set* child->parent = parent under the |
| * lock. You may *clear* that reference (and decref the parent) outside the |
| * parent's qlock when freeing the child. At this point, there are no |
| * references to child, it is off all lists and structures, and RCU readers are |
| * done. |
| * |
| * We'll need to make these changes for create, remove, rename, and lookup (add |
| * entries (positive or negative) that we didn't know about). Note that even |
| * changing the basename, but not moving, a child during rename requires the |
| * parent's qlock. (It changes the hashtable linkage). |
| * |
| * Keep in mind that the info in the frontend (tree_files) may be a subset of |
| * the real filesystem, which is the backend (e.g. 9p mount, disk structures, |
| * etc). The frontend might not have everything from the backend, typically |
| * because of memory reclaim. |
| * |
| * Readers use the walk cache hash table and child->parent) with RCU read |
| * locks. The linked list of children is only accessed under the parent's |
| * qlock. If a child needs a ref on its parent, it will need to kref_get during |
| * rcu lock. Children hold refs on parents, but a concurrent removal/rename |
| * could break that link and decref the parent. |
| * |
| * Synchronization rules |
| * ------------------ |
| * Hold the lifetime spin lock when: |
| * - Changing or relying on the value of flags (e.g., during kcref upping) |
| * - Upping a kref from 0 during a lookup |
| * - Changing membership on LRU lists (e.g. during release or kcref upping) |
| * The lock ordering is entry -> LRU list. Note the LRU pruner will lock the |
| * list first, then *trylock* the entry *and* the parent, skipping on failure. |
| * |
| * Refcounting rules: (subject to a device's whims) |
| * - All walked or attached chans (distinct; those that will be closed in the |
| * device) have a refcounted tree_file. |
| * - The tree_files do not have to be in a lookup structure (parent's list / |
| * hash table); they could have been unlinked/removed and waiting for the |
| * final close. |
| * - tree_files can have 0 references. Typically, files that are unopened |
| * (technically, unwalked) will have their kref == 0. On release, they can |
| * stay in the tree/lookup system, and they are added to an LRU list. |
| * - negative tree_files (i.e. names for files that don't exist) always have a |
| * refcnt == 0, and are candidates for being pruned at all times. |
| * |
| * Lock ordering: |
| * - Note the tree uses the qlock in the FS file. In essence, they are the same |
| * object (a tree_file is an fs_file). This qlock is for changing the |
| * contents of the file, whether that's the parent directories links to its |
| * children, a file's contents (via write), or the metadata (dir) of either. |
| * - Parent qlock -> hash_table bucketlock |
| * - Parent qlock -> child lifetime spinlock -> LRU list spinlock |
| * Note the LRU pruner inverts this ordering, and uses trylocks |
| * - Parent qlock -> child qlock (unlink and create/rename). |
| * - Qlocking multiple files that aren't parent->child requires the rename_mtx |
| */ |
| struct tree_file { |
| struct fs_file file; |
| spinlock_t lifetime; |
| int flags; |
| struct kref kref; |
| struct rcu_head rcu; |
| struct tree_file *parent; /* rcu protected */ |
| struct hlist_node hash; /* rcu protected */ |
| struct list_head siblings; |
| struct list_head children; |
| struct list_head lru; |
| bool can_have_children; |
| struct tree_filesystem *tfs; |
| }; |
| |
| #define TF_F_DISCONNECTED (1 << 0) |
| #define TF_F_NEGATIVE (1 << 1) |
| #define TF_F_ON_LRU (1 << 2) |
| #define TF_F_IS_ROOT (1 << 3) |
| #define TF_F_HAS_BEEN_USED (1 << 4) |
| |
| /* Devices can put their tree_files / fs_files whereever they want. For now, |
| * all of them will use aux. We can make ops for this if we need it. */ |
| static inline struct tree_file *chan_to_tree_file(struct chan *c) |
| { |
| return c->aux; |
| } |
| |
| static inline void chan_set_tree_file(struct chan *c, struct tree_file *tf) |
| { |
| c->aux = tf; |
| } |
| |
| static inline struct qid tree_file_to_qid(struct tree_file *tf) |
| { |
| return tf->file.dir.qid; |
| } |
| |
| static inline bool tree_file_is_dir(struct tree_file *tf) |
| { |
| return tree_file_to_qid(tf).type & QTDIR ? true : false; |
| } |
| |
| static inline bool tree_file_is_file(struct tree_file *tf) |
| { |
| return qid_is_file(tree_file_to_qid(tf)); |
| } |
| |
| static inline bool tree_file_is_symlink(struct tree_file *tf) |
| { |
| return tree_file_to_qid(tf).type & QTSYMLINK ? true : false; |
| } |
| |
| static inline bool tree_file_is_negative(struct tree_file *tf) |
| { |
| return tf->flags & TF_F_NEGATIVE ? true : false; |
| } |
| |
| static inline bool tree_file_is_root(struct tree_file *tf) |
| { |
| return tf == tf->tfs->root; |
| } |
| |
| static inline char *tree_file_to_name(struct tree_file *tf) |
| { |
| return tf->file.dir.name; |
| } |
| |
| static inline bool caller_has_tf_perms(struct tree_file *tf, int omode) |
| { |
| return caller_has_file_perms(&tf->file, omode); |
| } |
| |
| static inline void tree_file_perm_check(struct tree_file *tf, int omode) |
| { |
| fs_file_perm_check(&tf->file, omode); |
| } |
| |
| /* tree_file helpers */ |
| bool tf_kref_get(struct tree_file *tf); |
| void tf_kref_put(struct tree_file *tf); |
| struct tree_file *tree_file_alloc(struct tree_filesystem *tfs, |
| struct tree_file *parent, const char *name); |
| struct walkqid *tree_file_walk(struct tree_file *from, char **name, |
| unsigned int nname); |
| struct tree_file *tree_file_create(struct tree_file *parent, const char *name, |
| uint32_t perm, char *ext); |
| void tree_file_remove(struct tree_file *child); |
| void tree_file_rename(struct tree_file *tf, struct tree_file *new_parent, |
| const char *name, int flags); |
| ssize_t tree_file_readdir(struct tree_file *parent, void *ubuf, size_t n, |
| off64_t offset, int *dri); |
| |
| /* tree_file helpers that operate on chans */ |
| struct walkqid *tree_chan_walk(struct chan *c, struct chan *nc, char **name, |
| unsigned int nname); |
| void tree_chan_create(struct chan *c, char *name, int omode, uint32_t perm, |
| char *ext); |
| struct chan *tree_chan_open(struct chan *c, int omode); |
| void tree_chan_close(struct chan *c); |
| void tree_chan_remove(struct chan *c); |
| void tree_chan_rename(struct chan *c, struct chan *new_p_c, const char *name, |
| int flags); |
| size_t tree_chan_read(struct chan *c, void *ubuf, size_t n, off64_t offset); |
| size_t tree_chan_write(struct chan *c, void *ubuf, size_t n, off64_t offset); |
| size_t tree_chan_stat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz); |
| size_t tree_chan_wstat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz); |
| struct fs_file *tree_chan_mmap(struct chan *c, struct vm_region *vmr, int prot, |
| int flags); |
| unsigned long tree_chan_ctl(struct chan *c, int op, unsigned long a1, |
| unsigned long a2, unsigned long a3, |
| unsigned long a4); |
| |
| struct chan *tree_file_alloc_chan(struct tree_file *tf, struct dev *dev, |
| char *name); |
| void tfs_init(struct tree_filesystem *tfs); |
| void tfs_destroy(struct tree_filesystem *tfs); |
| void tfs_frontend_for_each(struct tree_filesystem *tfs, |
| void (*cb)(struct tree_file *tf)); |
| void tfs_frontend_purge(struct tree_filesystem *tfs, |
| void (*cb)(struct tree_file *tf)); |
| void __tfs_dump(struct tree_filesystem *tfs); |
| void __tfs_dump_tf(struct tree_file *tf); |
| |
| void tfs_lru_for_each(struct tree_filesystem *tfs, bool cb(struct tree_file *), |
| size_t max_tfs); |
| void tfs_lru_prune_neg(struct tree_filesystem *tfs); |