| /* Copyright (c) 2018 Google Inc | 
 |  * Barret Rhoden <brho@cs.berkeley.edu> | 
 |  * See LICENSE for details. | 
 |  * | 
 |  * tree_file: structs and helpers to for a tree-based filesystem for 9ns | 
 |  * devices. | 
 |  */ | 
 |  | 
 | #pragma once | 
 |  | 
 | #include <fs_file.h> | 
 | #include <ns.h> | 
 | #include <list.h> | 
 | #include <rcu.h> | 
 |  | 
 | struct tree_file; | 
 | struct tree_filesystem; | 
 |  | 
 | /* The walk cache encapsulates a couple things related to caching, similar to | 
 |  * the dentry cache in the VFS. | 
 |  * - the parent -> child links in a hash table | 
 |  * - the LRU list, consisting of all tree files with kref == 0 | 
 |  * | 
 |  * LRU notes.  Once a TF gets linked into a tree: | 
 |  * - a TF is on the LRU IFF its refcnt == 0 | 
 |  * - a TF is on the LRU only if it is in the tree | 
 |  * - any time kref gets set to 0, we consider putting it on the LRU (if not | 
 |  *   DISCONNECTED / in the tree).  anytime it is increffed from 0, we yank it. | 
 |  * - NEG entries are always kref == 0, and are on the LRU list if they are in | 
 |  *   the tree.  They are never increffed, only rcu-read. | 
 |  */ | 
 | struct walk_cache { | 
 | 	spinlock_t					lru_lock; | 
 | 	struct list_head			lru; | 
 | 	spinlock_t					ht_lock; | 
 | 	struct hash_helper			hh;		/* parts are rcu-read */ | 
 | 	struct hlist_head			*ht; | 
 | 	struct hlist_head			static_ht[HASH_INIT_SZ]; | 
 | }; | 
 |  | 
 | /* All ops that operate on a parent have the parent qlocked. | 
 |  * | 
 |  * free(): the TF is being freed.  Might be called on TFs that the TFS is | 
 |  * unaware of (e.g. lookup threw an error). | 
 |  * | 
 |  * unlink(): child is being unlinked from the parent. | 
 |  * | 
 |  * lookup(): backends fill in the child TF with whatever it knows about the | 
 |  * entry.  This function is optional.  TFSs whose entire tree is in the frontend | 
 |  * (e.g.  ramfs) don't do lookups from the non-existent backend.  This | 
 |  * structure, positive or negative, will be used until it is unlinked. | 
 |  * | 
 |  * create(): given parent and it's initialized child struct, create the file and | 
 |  * do whatever else you need (e.g. 9p backend does the create).  Same as lookup, | 
 |  * the TFS needs to fill in child->dir, except for name.  Note we don't pass | 
 |  * omode.  That's handled in the device's front-end create: open the chan after | 
 |  * creating. | 
 |  * | 
 |  * rename(): rename / move TF from the old_parent to the new_parent, and change | 
 |  * the name to name (NULL == no change).  Note that the rename op does *not* set | 
 |  * the TF's name yet.  Be careful changing any fields in the TF, since it is | 
 |  * still in old_parent's HT. | 
 |  * | 
 |  * has_children(): TFSs that use the front end as a cache (e.g. #mnt) need to | 
 |  * check with their backend to know if there are children or not.  Checking the | 
 |  * children list isn't necessarily sufficient. | 
 |  */ | 
 | struct tree_file_ops { | 
 | 	void (*free)(struct tree_file *tf); | 
 | 	void (*unlink)(struct tree_file *parent, struct tree_file *child); | 
 | 	void (*lookup)(struct tree_file *parent, struct tree_file *child); | 
 | 	void (*create)(struct tree_file *parent, struct tree_file *child, int perm); | 
 | 	void (*rename)(struct tree_file *tf, struct tree_file *old_parent, | 
 | 	               struct tree_file *new_parent, const char *name, int flags); | 
 | 	bool (*has_children)(struct tree_file *parent); | 
 | }; | 
 |  | 
 | struct tree_filesystem { | 
 | 	struct walk_cache			wc; | 
 | 	struct tree_file_ops		tf_ops; | 
 | 	struct fs_file_ops			fs_ops; | 
 | 	qlock_t						rename_mtx; | 
 | 	struct tree_file			*root; | 
 | 	void						*priv; | 
 | }; | 
 |  | 
 | /* The tree_file is an fs_file (i.e. the first struct field) that exists in a | 
 |  * tree filesystem with parent/child relationships. | 
 |  * | 
 |  * Children hold krefs on their parents.  Parents have weak refs (i.e. not a | 
 |  * kref) on their children.  Parent's have a list of *known* children.  The | 
 |  * backend of the FS (e.g. the other side of #mnt) may know of other children. | 
 |  * In this way, the parent's list is a cache.  For devices like a ramfs or any | 
 |  * dynamic, completely tree-based system (e.g. #srv), the tree filesystem is the | 
 |  * authoritative FS; there is no backend. | 
 |  * | 
 |  * If we know of a parent-child relationship (i.e. it's in the tree_filesystem), | 
 |  * then the following invariants are true: | 
 |  * - The child is in the parent's list. | 
 |  * - The child->parent == the parent's tree_file | 
 |  * - There is an entry in the walk cache from {parent,name} -> child_tree_file. | 
 |  * - The backend FS (if there is one) knows about the parent-child link | 
 |  * | 
 |  * To change these, hold the parent's qlock and you must change them all, | 
 |  * with a slight exception: you must *set* child->parent = parent under the | 
 |  * lock.  You may *clear* that reference (and decref the parent) outside the | 
 |  * parent's qlock when freeing the child.  At this point, there are no | 
 |  * references to child, it is off all lists and structures, and RCU readers are | 
 |  * done. | 
 |  * | 
 |  * We'll need to make these changes for create, remove, rename, and lookup (add | 
 |  * entries (positive or negative) that we didn't know about).  Note that even | 
 |  * changing the basename, but not moving, a child during rename requires the | 
 |  * parent's qlock.  (It changes the hashtable linkage). | 
 |  * | 
 |  * Keep in mind that the info in the frontend (tree_files) may be a subset of | 
 |  * the real filesystem, which is the backend (e.g. 9p mount, disk structures, | 
 |  * etc).  The frontend might not have everything from the backend, typically | 
 |  * because of memory reclaim. | 
 |  * | 
 |  * Readers use the walk cache hash table and child->parent) with RCU read | 
 |  * locks.  The linked list of children is only accessed under the parent's | 
 |  * qlock.  If a child needs a ref on its parent, it will need to kref_get during | 
 |  * rcu lock.  Children hold refs on parents, but a concurrent removal/rename | 
 |  * could break that link and decref the parent. | 
 |  * | 
 |  * Synchronization rules | 
 |  * ------------------ | 
 |  * Hold the lifetime spin lock when: | 
 |  * - Changing or relying on the value of flags (e.g., during kcref upping) | 
 |  * - Upping a kref from 0 during a lookup | 
 |  * - Changing membership on LRU lists (e.g. during release or kcref upping) | 
 |  *   The lock ordering is entry -> LRU list.  Note the LRU pruner will lock the | 
 |  *   list first, then *trylock* the entry *and* the parent, skipping on failure. | 
 |  * | 
 |  * Refcounting rules: (subject to a device's whims) | 
 |  * - All walked or attached chans (distinct; those that will be closed in the | 
 |  *   device) have a refcounted tree_file. | 
 |  * - The tree_files do not have to be in a lookup structure (parent's list / | 
 |  *   hash table); they could have been unlinked/removed and waiting for the | 
 |  *   final close. | 
 |  * - tree_files can have 0 references.  Typically, files that are unopened | 
 |  *   (technically, unwalked) will have their kref == 0.  On release, they can | 
 |  *   stay in the tree/lookup system, and they are added to an LRU list. | 
 |  * - negative tree_files (i.e. names for files that don't exist) always have a | 
 |  *   refcnt == 0, and are candidates for being pruned at all times. | 
 |  * | 
 |  * Lock ordering: | 
 |  * - Note the tree uses the qlock in the FS file.  In essence, they are the same | 
 |  *   object (a tree_file is an fs_file).  This qlock is for changing the | 
 |  *   contents of the file, whether that's the parent directories links to its | 
 |  *   children, a file's contents (via write), or the metadata (dir) of either. | 
 |  * - Parent qlock -> hash_table bucketlock | 
 |  * - Parent qlock -> child lifetime spinlock -> LRU list spinlock | 
 |  *   Note the LRU pruner inverts this ordering, and uses trylocks | 
 |  * - Parent qlock -> child qlock (unlink and create/rename). | 
 |  * - Qlocking multiple files that aren't parent->child requires the rename_mtx | 
 |  */ | 
 | struct tree_file { | 
 | 	struct fs_file				file; | 
 | 	spinlock_t					lifetime; | 
 | 	int							flags; | 
 | 	struct kref					kref; | 
 | 	struct rcu_head				rcu; | 
 | 	struct tree_file			*parent;		/* rcu protected */ | 
 | 	struct hlist_node			hash;			/* rcu protected */ | 
 | 	struct list_head			siblings; | 
 | 	struct list_head			children; | 
 | 	struct list_head			lru; | 
 | 	bool						can_have_children; | 
 | 	struct tree_filesystem		*tfs; | 
 | }; | 
 |  | 
 | #define TF_F_DISCONNECTED		(1 << 0) | 
 | #define TF_F_NEGATIVE			(1 << 1) | 
 | #define TF_F_ON_LRU				(1 << 2) | 
 | #define TF_F_IS_ROOT			(1 << 3) | 
 | #define TF_F_HAS_BEEN_USED		(1 << 4) | 
 |  | 
 | /* Devices can put their tree_files / fs_files whereever they want.  For now, | 
 |  * all of them will use aux.  We can make ops for this if we need it. */ | 
 | static inline struct tree_file *chan_to_tree_file(struct chan *c) | 
 | { | 
 | 	return c->aux; | 
 | } | 
 |  | 
 | static inline void chan_set_tree_file(struct chan *c, struct tree_file *tf) | 
 | { | 
 | 	c->aux = tf; | 
 | } | 
 |  | 
 | static inline struct qid tree_file_to_qid(struct tree_file *tf) | 
 | { | 
 | 	return tf->file.dir.qid; | 
 | } | 
 |  | 
 | static inline bool tree_file_is_dir(struct tree_file *tf) | 
 | { | 
 | 	return tree_file_to_qid(tf).type & QTDIR ? true : false; | 
 | } | 
 |  | 
 | static inline bool tree_file_is_file(struct tree_file *tf) | 
 | { | 
 | 	return qid_is_file(tree_file_to_qid(tf)); | 
 | } | 
 |  | 
 | static inline bool tree_file_is_symlink(struct tree_file *tf) | 
 | { | 
 | 	return tree_file_to_qid(tf).type & QTSYMLINK ? true : false; | 
 | } | 
 |  | 
 | static inline bool tree_file_is_negative(struct tree_file *tf) | 
 | { | 
 | 	return tf->flags & TF_F_NEGATIVE ? true : false; | 
 | } | 
 |  | 
 | static inline bool tree_file_is_root(struct tree_file *tf) | 
 | { | 
 | 	return tf == tf->tfs->root; | 
 | } | 
 |  | 
 | static inline char *tree_file_to_name(struct tree_file *tf) | 
 | { | 
 | 	return tf->file.dir.name; | 
 | } | 
 |  | 
 | static inline bool caller_has_tf_perms(struct tree_file *tf, int omode) | 
 | { | 
 | 	return caller_has_file_perms(&tf->file, omode); | 
 | } | 
 |  | 
 | static inline void tree_file_perm_check(struct tree_file *tf, int omode) | 
 | { | 
 | 	fs_file_perm_check(&tf->file, omode); | 
 | } | 
 |  | 
 | /* tree_file helpers */ | 
 | bool tf_kref_get(struct tree_file *tf); | 
 | void tf_kref_put(struct tree_file *tf); | 
 | struct tree_file *tree_file_alloc(struct tree_filesystem *tfs, | 
 |                                   struct tree_file *parent, const char *name); | 
 | struct walkqid *tree_file_walk(struct tree_file *from, char **name, | 
 |                                unsigned int nname); | 
 | struct tree_file *tree_file_create(struct tree_file *parent, const char *name, | 
 |                                    uint32_t perm, char *ext); | 
 | void tree_file_remove(struct tree_file *child); | 
 | void tree_file_rename(struct tree_file *tf, struct tree_file *new_parent, | 
 |                       const char *name, int flags); | 
 | ssize_t tree_file_readdir(struct tree_file *parent, void *ubuf, size_t n, | 
 |                           off64_t offset, int *dri); | 
 |  | 
 | /* tree_file helpers that operate on chans */ | 
 | struct walkqid *tree_chan_walk(struct chan *c, struct chan *nc, char **name, | 
 |                                unsigned int nname); | 
 | void tree_chan_create(struct chan *c, char *name, int omode, uint32_t perm, | 
 |                       char *ext); | 
 | struct chan *tree_chan_open(struct chan *c, int omode); | 
 | void tree_chan_close(struct chan *c); | 
 | void tree_chan_remove(struct chan *c); | 
 | void tree_chan_rename(struct chan *c, struct chan *new_p_c, const char *name, | 
 |                       int flags); | 
 | size_t tree_chan_read(struct chan *c, void *ubuf, size_t n, off64_t offset); | 
 | size_t tree_chan_write(struct chan *c, void *ubuf, size_t n, off64_t offset); | 
 | size_t tree_chan_stat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz); | 
 | size_t tree_chan_wstat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz); | 
 | struct fs_file *tree_chan_mmap(struct chan *c, struct vm_region *vmr, int prot, | 
 |                                int flags); | 
 |  | 
 | struct chan *tree_file_alloc_chan(struct tree_file *tf, struct dev *dev, | 
 |                                   char *name); | 
 | void tfs_init(struct tree_filesystem *tfs); | 
 | void tfs_destroy(struct tree_filesystem *tfs); | 
 | void tfs_frontend_for_each(struct tree_filesystem *tfs, | 
 |                            void (*cb)(struct tree_file *tf)); | 
 | void tfs_frontend_purge(struct tree_filesystem *tfs, | 
 |                         void (*cb)(struct tree_file *tf)); | 
 | void __tfs_dump(struct tree_filesystem *tfs); | 
 |  | 
 | void tfs_lru_for_each(struct tree_filesystem *tfs, bool cb(struct tree_file *), | 
 |                       size_t max_tfs); | 
 | void tfs_lru_prune_neg(struct tree_filesystem *tfs); |