|  | /* Copyright (c) 2018 Google Inc | 
|  | * Barret Rhoden <brho@cs.berkeley.edu> | 
|  | * See LICENSE for details. | 
|  | * | 
|  | * tree_file: structs and helpers to for a tree-based filesystem for 9ns | 
|  | * devices. | 
|  | */ | 
|  |  | 
|  | #pragma once | 
|  |  | 
|  | #include <fs_file.h> | 
|  | #include <ns.h> | 
|  | #include <list.h> | 
|  | #include <rcu.h> | 
|  |  | 
|  | struct tree_file; | 
|  | struct tree_filesystem; | 
|  |  | 
|  | /* The walk cache encapsulates a couple things related to caching, similar to | 
|  | * the dentry cache in the VFS. | 
|  | * - the parent -> child links in a hash table | 
|  | * - the LRU list, consisting of all tree files with kref == 0 | 
|  | * | 
|  | * LRU notes.  Once a TF gets linked into a tree: | 
|  | * - a TF is on the LRU IFF its refcnt == 0 | 
|  | * - a TF is on the LRU only if it is in the tree | 
|  | * - any time kref gets set to 0, we consider putting it on the LRU (if not | 
|  | *   DISCONNECTED / in the tree).  anytime it is increffed from 0, we yank it. | 
|  | * - NEG entries are always kref == 0, and are on the LRU list if they are in | 
|  | *   the tree.  They are never increffed, only rcu-read. | 
|  | */ | 
|  | struct walk_cache { | 
|  | spinlock_t			lru_lock; | 
|  | struct list_head		lru; | 
|  | spinlock_t			ht_lock; | 
|  | struct hash_helper		hh;		/* parts are rcu-read */ | 
|  | struct hlist_head		*ht; | 
|  | struct hlist_head		static_ht[HASH_INIT_SZ]; | 
|  | }; | 
|  |  | 
|  | /* All ops that operate on a parent have the parent qlocked. | 
|  | * | 
|  | * free(): the TF is being freed.  Might be called on TFs that the TFS is | 
|  | * unaware of (e.g. lookup threw an error). | 
|  | * | 
|  | * unlink(): child is being unlinked from the parent. | 
|  | * | 
|  | * lookup(): backends fill in the child TF with whatever it knows about the | 
|  | * entry.  This function is optional.  TFSs whose entire tree is in the frontend | 
|  | * (e.g.  ramfs) don't do lookups from the non-existent backend.  This | 
|  | * structure, positive or negative, will be used until it is unlinked. | 
|  | * | 
|  | * create(): given parent and it's initialized child struct, create the file and | 
|  | * do whatever else you need (e.g. 9p backend does the create).  Same as lookup, | 
|  | * the TFS needs to fill in child->dir, except for name.  Note we don't pass | 
|  | * omode.  That's handled in the device's front-end create: open the chan after | 
|  | * creating. | 
|  | * | 
|  | * rename(): rename / move TF from the old_parent to the new_parent, and change | 
|  | * the name to name (NULL == no change).  Note that the rename op does *not* set | 
|  | * the TF's name yet.  Be careful changing any fields in the TF, since it is | 
|  | * still in old_parent's HT. | 
|  | * | 
|  | * has_children(): TFSs that use the front end as a cache (e.g. #mnt) need to | 
|  | * check with their backend to know if there are children or not.  Checking the | 
|  | * children list isn't necessarily sufficient. | 
|  | */ | 
|  | struct tree_file_ops { | 
|  | void (*free)(struct tree_file *tf); | 
|  | void (*unlink)(struct tree_file *parent, struct tree_file *child); | 
|  | void (*lookup)(struct tree_file *parent, struct tree_file *child); | 
|  | void (*create)(struct tree_file *parent, struct tree_file *child, | 
|  | int perm); | 
|  | void (*rename)(struct tree_file *tf, struct tree_file *old_parent, | 
|  | struct tree_file *new_parent, const char *name, | 
|  | int flags); | 
|  | bool (*has_children)(struct tree_file *parent); | 
|  | }; | 
|  |  | 
|  | struct tree_filesystem { | 
|  | struct walk_cache		wc; | 
|  | struct tree_file_ops		tf_ops; | 
|  | struct fs_file_ops		fs_ops; | 
|  | qlock_t				rename_mtx; | 
|  | struct tree_file		*root; | 
|  | void				*priv; | 
|  | }; | 
|  |  | 
|  | /* The tree_file is an fs_file (i.e. the first struct field) that exists in a | 
|  | * tree filesystem with parent/child relationships. | 
|  | * | 
|  | * Children hold krefs on their parents.  Parents have weak refs (i.e. not a | 
|  | * kref) on their children.  Parent's have a list of *known* children.  The | 
|  | * backend of the FS (e.g. the other side of #mnt) may know of other children. | 
|  | * In this way, the parent's list is a cache.  For devices like a ramfs or any | 
|  | * dynamic, completely tree-based system (e.g. #srv), the tree filesystem is the | 
|  | * authoritative FS; there is no backend. | 
|  | * | 
|  | * If we know of a parent-child relationship (i.e. it's in the tree_filesystem), | 
|  | * then the following invariants are true: | 
|  | * - The child is in the parent's list. | 
|  | * - The child->parent == the parent's tree_file | 
|  | * - There is an entry in the walk cache from {parent,name} -> child_tree_file. | 
|  | * - The backend FS (if there is one) knows about the parent-child link | 
|  | * | 
|  | * To change these, hold the parent's qlock and you must change them all, | 
|  | * with a slight exception: you must *set* child->parent = parent under the | 
|  | * lock.  You may *clear* that reference (and decref the parent) outside the | 
|  | * parent's qlock when freeing the child.  At this point, there are no | 
|  | * references to child, it is off all lists and structures, and RCU readers are | 
|  | * done. | 
|  | * | 
|  | * We'll need to make these changes for create, remove, rename, and lookup (add | 
|  | * entries (positive or negative) that we didn't know about).  Note that even | 
|  | * changing the basename, but not moving, a child during rename requires the | 
|  | * parent's qlock.  (It changes the hashtable linkage). | 
|  | * | 
|  | * Keep in mind that the info in the frontend (tree_files) may be a subset of | 
|  | * the real filesystem, which is the backend (e.g. 9p mount, disk structures, | 
|  | * etc).  The frontend might not have everything from the backend, typically | 
|  | * because of memory reclaim. | 
|  | * | 
|  | * Readers use the walk cache hash table and child->parent) with RCU read | 
|  | * locks.  The linked list of children is only accessed under the parent's | 
|  | * qlock.  If a child needs a ref on its parent, it will need to kref_get during | 
|  | * rcu lock.  Children hold refs on parents, but a concurrent removal/rename | 
|  | * could break that link and decref the parent. | 
|  | * | 
|  | * Synchronization rules | 
|  | * ------------------ | 
|  | * Hold the lifetime spin lock when: | 
|  | * - Changing or relying on the value of flags (e.g., during kcref upping) | 
|  | * - Upping a kref from 0 during a lookup | 
|  | * - Changing membership on LRU lists (e.g. during release or kcref upping) | 
|  | *   The lock ordering is entry -> LRU list.  Note the LRU pruner will lock the | 
|  | *   list first, then *trylock* the entry *and* the parent, skipping on failure. | 
|  | * | 
|  | * Refcounting rules: (subject to a device's whims) | 
|  | * - All walked or attached chans (distinct; those that will be closed in the | 
|  | *   device) have a refcounted tree_file. | 
|  | * - The tree_files do not have to be in a lookup structure (parent's list / | 
|  | *   hash table); they could have been unlinked/removed and waiting for the | 
|  | *   final close. | 
|  | * - tree_files can have 0 references.  Typically, files that are unopened | 
|  | *   (technically, unwalked) will have their kref == 0.  On release, they can | 
|  | *   stay in the tree/lookup system, and they are added to an LRU list. | 
|  | * - negative tree_files (i.e. names for files that don't exist) always have a | 
|  | *   refcnt == 0, and are candidates for being pruned at all times. | 
|  | * | 
|  | * Lock ordering: | 
|  | * - Note the tree uses the qlock in the FS file.  In essence, they are the same | 
|  | *   object (a tree_file is an fs_file).  This qlock is for changing the | 
|  | *   contents of the file, whether that's the parent directories links to its | 
|  | *   children, a file's contents (via write), or the metadata (dir) of either. | 
|  | * - Parent qlock -> hash_table bucketlock | 
|  | * - Parent qlock -> child lifetime spinlock -> LRU list spinlock | 
|  | *   Note the LRU pruner inverts this ordering, and uses trylocks | 
|  | * - Parent qlock -> child qlock (unlink and create/rename). | 
|  | * - Qlocking multiple files that aren't parent->child requires the rename_mtx | 
|  | */ | 
|  | struct tree_file { | 
|  | struct fs_file			file; | 
|  | spinlock_t			lifetime; | 
|  | int				flags; | 
|  | struct kref			kref; | 
|  | struct rcu_head			rcu; | 
|  | struct tree_file		*parent;	/* rcu protected */ | 
|  | struct hlist_node		hash;		/* rcu protected */ | 
|  | struct list_head		siblings; | 
|  | struct list_head		children; | 
|  | struct list_head		lru; | 
|  | bool				can_have_children; | 
|  | struct tree_filesystem		*tfs; | 
|  | }; | 
|  |  | 
|  | #define TF_F_DISCONNECTED	(1 << 0) | 
|  | #define TF_F_NEGATIVE		(1 << 1) | 
|  | #define TF_F_ON_LRU		(1 << 2) | 
|  | #define TF_F_IS_ROOT		(1 << 3) | 
|  | #define TF_F_HAS_BEEN_USED	(1 << 4) | 
|  |  | 
|  | /* Devices can put their tree_files / fs_files whereever they want.  For now, | 
|  | * all of them will use aux.  We can make ops for this if we need it. */ | 
|  | static inline struct tree_file *chan_to_tree_file(struct chan *c) | 
|  | { | 
|  | return c->aux; | 
|  | } | 
|  |  | 
|  | static inline void chan_set_tree_file(struct chan *c, struct tree_file *tf) | 
|  | { | 
|  | c->aux = tf; | 
|  | } | 
|  |  | 
|  | static inline struct qid tree_file_to_qid(struct tree_file *tf) | 
|  | { | 
|  | return tf->file.dir.qid; | 
|  | } | 
|  |  | 
|  | static inline bool tree_file_is_dir(struct tree_file *tf) | 
|  | { | 
|  | return tree_file_to_qid(tf).type & QTDIR ? true : false; | 
|  | } | 
|  |  | 
|  | static inline bool tree_file_is_file(struct tree_file *tf) | 
|  | { | 
|  | return qid_is_file(tree_file_to_qid(tf)); | 
|  | } | 
|  |  | 
|  | static inline bool tree_file_is_symlink(struct tree_file *tf) | 
|  | { | 
|  | return tree_file_to_qid(tf).type & QTSYMLINK ? true : false; | 
|  | } | 
|  |  | 
|  | static inline bool tree_file_is_negative(struct tree_file *tf) | 
|  | { | 
|  | return tf->flags & TF_F_NEGATIVE ? true : false; | 
|  | } | 
|  |  | 
|  | static inline bool tree_file_is_root(struct tree_file *tf) | 
|  | { | 
|  | return tf == tf->tfs->root; | 
|  | } | 
|  |  | 
|  | static inline char *tree_file_to_name(struct tree_file *tf) | 
|  | { | 
|  | return tf->file.dir.name; | 
|  | } | 
|  |  | 
|  | static inline bool caller_has_tf_perms(struct tree_file *tf, int omode) | 
|  | { | 
|  | return caller_has_file_perms(&tf->file, omode); | 
|  | } | 
|  |  | 
|  | static inline void tree_file_perm_check(struct tree_file *tf, int omode) | 
|  | { | 
|  | fs_file_perm_check(&tf->file, omode); | 
|  | } | 
|  |  | 
|  | /* tree_file helpers */ | 
|  | bool tf_kref_get(struct tree_file *tf); | 
|  | void tf_kref_put(struct tree_file *tf); | 
|  | struct tree_file *tree_file_alloc(struct tree_filesystem *tfs, | 
|  | struct tree_file *parent, const char *name); | 
|  | struct walkqid *tree_file_walk(struct tree_file *from, char **name, | 
|  | unsigned int nname); | 
|  | struct tree_file *tree_file_create(struct tree_file *parent, const char *name, | 
|  | uint32_t perm, char *ext); | 
|  | void tree_file_remove(struct tree_file *child); | 
|  | void tree_file_rename(struct tree_file *tf, struct tree_file *new_parent, | 
|  | const char *name, int flags); | 
|  | ssize_t tree_file_readdir(struct tree_file *parent, void *ubuf, size_t n, | 
|  | off64_t offset, int *dri); | 
|  |  | 
|  | /* tree_file helpers that operate on chans */ | 
|  | struct walkqid *tree_chan_walk(struct chan *c, struct chan *nc, char **name, | 
|  | unsigned int nname); | 
|  | void tree_chan_create(struct chan *c, char *name, int omode, uint32_t perm, | 
|  | char *ext); | 
|  | struct chan *tree_chan_open(struct chan *c, int omode); | 
|  | void tree_chan_close(struct chan *c); | 
|  | void tree_chan_remove(struct chan *c); | 
|  | void tree_chan_rename(struct chan *c, struct chan *new_p_c, const char *name, | 
|  | int flags); | 
|  | size_t tree_chan_read(struct chan *c, void *ubuf, size_t n, off64_t offset); | 
|  | size_t tree_chan_write(struct chan *c, void *ubuf, size_t n, off64_t offset); | 
|  | size_t tree_chan_stat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz); | 
|  | size_t tree_chan_wstat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz); | 
|  | struct fs_file *tree_chan_mmap(struct chan *c, struct vm_region *vmr, int prot, | 
|  | int flags); | 
|  | unsigned long tree_chan_ctl(struct chan *c, int op, unsigned long a1, | 
|  | unsigned long a2, unsigned long a3, | 
|  | unsigned long a4); | 
|  |  | 
|  | struct chan *tree_file_alloc_chan(struct tree_file *tf, struct dev *dev, | 
|  | char *name); | 
|  | void tfs_init(struct tree_filesystem *tfs); | 
|  | void tfs_destroy(struct tree_filesystem *tfs); | 
|  | void tfs_frontend_for_each(struct tree_filesystem *tfs, | 
|  | void (*cb)(struct tree_file *tf)); | 
|  | void tfs_frontend_purge(struct tree_filesystem *tfs, | 
|  | void (*cb)(struct tree_file *tf)); | 
|  | void __tfs_dump(struct tree_filesystem *tfs); | 
|  | void __tfs_dump_tf(struct tree_file *tf); | 
|  |  | 
|  | void tfs_lru_for_each(struct tree_filesystem *tfs, bool cb(struct tree_file *), | 
|  | size_t max_tfs); | 
|  | void tfs_lru_prune_neg(struct tree_filesystem *tfs); |