|  | /* Copyright (c) 2018 Google Inc | 
|  | * Barret Rhoden <brho@cs.berkeley.edu> | 
|  | * See LICENSE for details. | 
|  | * | 
|  | * #gtfs, generic tree file system frontend that hooks to a backend 9p device | 
|  | */ | 
|  |  | 
|  | #include <slab.h> | 
|  | #include <kmalloc.h> | 
|  | #include <kref.h> | 
|  | #include <string.h> | 
|  | #include <stdio.h> | 
|  | #include <assert.h> | 
|  | #include <error.h> | 
|  | #include <pmap.h> | 
|  | #include <smp.h> | 
|  | #include <tree_file.h> | 
|  |  | 
|  | struct dev gtfs_devtab; | 
|  |  | 
|  | static char *devname(void) | 
|  | { | 
|  | return gtfs_devtab.name; | 
|  | } | 
|  |  | 
|  | struct gtfs { | 
|  | struct tree_filesystem		tfs; | 
|  | struct kref					users; | 
|  | }; | 
|  |  | 
|  | /* Blob hanging off the fs_file->priv.  The backend chans are only accessed, | 
|  | * (changed or used) with the corresponding fs_file qlock held.  That's the | 
|  | * primary use of the qlock - we might be able to avoid qlocking with increfs | 
|  | * and atomics or spinlocks, but be careful of be_length.  Qlocking doesn't | 
|  | * matter much yet since #mnt serializes. | 
|  | * | 
|  | * The walk chan is never opened - it's basically just the walked fid, from | 
|  | * which we can do other walks or get the I/O chans.  The read and write chans | 
|  | * are opened on demand and closed periodically.  We open them initially on | 
|  | * open/create in case we are unable to open them (e.g. unwritable).  Better to | 
|  | * find out early than during a long writeback. | 
|  | * | 
|  | * The mnt server might complain about having too many open fids.  We can run a | 
|  | * ktask that periodically closes the be_chans on any LRU'd files. | 
|  | * | 
|  | * be_{length,mode,mtime} should be what the remote server thinks they are - | 
|  | * especially for length and mode.  The invariant is that e.g. the file's length | 
|  | * == be_length, and the qlock protects that invariant.  We don't care as much | 
|  | * about mtime, since some 9p servers just change that on their own. | 
|  | * | 
|  | * Also note that you can't trust be_length for directories.  You'll often get | 
|  | * 4096 or 0, depending on the 9p server you're talking to. */ | 
|  | struct gtfs_priv { | 
|  | struct chan					*be_walk;	/* never opened */ | 
|  | struct chan					*be_read; | 
|  | struct chan					*be_write; | 
|  | uint64_t					be_length; | 
|  | uint32_t					be_mode; | 
|  | struct timespec				be_mtime; | 
|  | bool						was_removed; | 
|  | }; | 
|  |  | 
|  | static inline struct gtfs_priv *fsf_to_gtfs_priv(struct fs_file *f) | 
|  | { | 
|  | return f->priv; | 
|  | } | 
|  |  | 
|  | static inline struct gtfs_priv *tf_to_gtfs_priv(struct tree_file *tf) | 
|  | { | 
|  | return fsf_to_gtfs_priv(&tf->file); | 
|  | } | 
|  |  | 
|  | /* Helper.  Clones the chan (walks to itself) and then opens with omode. */ | 
|  | static struct chan *cclone_and_open(struct chan *c, int omode) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *new; | 
|  |  | 
|  | new = cclone(c); | 
|  | if (waserror()) { | 
|  | cclose(new); | 
|  | nexterror(); | 
|  | } | 
|  | new = devtab[new->type].open(new, omode); | 
|  | poperror(); | 
|  | return new; | 
|  | } | 
|  |  | 
|  | /* Send a wstat with the contents of dir for the file. */ | 
|  | static void wstat_dir(struct fs_file *f, struct dir *dir) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  | size_t sz; | 
|  | uint8_t *buf; | 
|  |  | 
|  | sz = sizeD2M(dir); | 
|  | buf = kzmalloc(sz, MEM_WAIT); | 
|  | convD2M(dir, buf, sz); | 
|  | if (waserror()) { | 
|  | kfree(buf); | 
|  | nexterror(); | 
|  | } | 
|  | devtab[gp->be_walk->type].wstat(gp->be_walk, buf, sz); | 
|  | kfree(buf); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | /* Note we only track and thus change the following: | 
|  | * - length | 
|  | * - mode | 
|  | * - mtime (second granularity) | 
|  | * If we support chown, we'll have to do something else there.  See | 
|  | * fs_file_copy_from_dir(). */ | 
|  | static void sync_metadata(struct fs_file *f) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  | struct dir dir; | 
|  | bool send_it = false; | 
|  |  | 
|  | qlock(&f->qlock); | 
|  | init_empty_dir(&dir); | 
|  | if (f->dir.length != gp->be_length) { | 
|  | dir.length = f->dir.length; | 
|  | send_it = true; | 
|  | } | 
|  | if (f->dir.mode != gp->be_mode) { | 
|  | dir.mode = f->dir.mode; | 
|  | send_it = true; | 
|  | } | 
|  | if (f->dir.mtime.tv_sec != gp->be_mtime.tv_sec) { | 
|  | /* ninep's UFS server assumes you set both atime and mtime */ | 
|  | dir.atime.tv_sec = f->dir.atime.tv_sec; | 
|  | dir.atime.tv_nsec = f->dir.atime.tv_nsec; | 
|  | dir.mtime.tv_sec = f->dir.mtime.tv_sec; | 
|  | dir.mtime.tv_nsec = f->dir.mtime.tv_nsec; | 
|  | send_it = true; | 
|  | } | 
|  | if (!send_it) { | 
|  | qunlock(&f->qlock); | 
|  | return; | 
|  | } | 
|  | if (waserror()) { | 
|  | qunlock(&f->qlock); | 
|  | nexterror(); | 
|  | } | 
|  | wstat_dir(f, &dir); | 
|  | /* We set these after the wstat succeeds.  If we set them earlier, we'd have | 
|  | * to roll back.  Remember the invariant: the be_values match the backend's | 
|  | * file's values.  We should be able to stat be_walk and check these (though | 
|  | * the 9p server might muck with atime/mtime). */ | 
|  | if (f->dir.length != gp->be_length) | 
|  | gp->be_length = f->dir.length; | 
|  | if (f->dir.mode != gp->be_mode) | 
|  | gp->be_mode = f->dir.mode; | 
|  | if (f->dir.mtime.tv_sec != gp->be_mtime.tv_sec) | 
|  | gp->be_mtime = f->dir.mtime; | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | /* Can throw on error, currently from sync_metadata. */ | 
|  | static void writeback_file(struct fs_file *f) | 
|  | { | 
|  | sync_metadata(f); | 
|  | /* This is a lockless peak.  Once a file is dirtied, we never undirty it. | 
|  | * To do so, we need the file qlock (not a big deal, though that may replace | 
|  | * the PM qlock), and we still need to handle/scan mmaps.  Specifically, we | 
|  | * only dirty when an mmap attaches (PROT_WRITE and MAP_SHARED), but we | 
|  | * don't know if an existing mapping has caused more dirtying (an mmap can | 
|  | * re-dirty then detach before our next writeback).  That usually requires a | 
|  | * scan.  This is all an optimization to avoid scanning the entire PM's | 
|  | * pages for whether or not they are dirty. | 
|  | * | 
|  | * Also, our writeback pm op grabs the file's qlock.  So be careful; though | 
|  | * we could use another qlock, since we're mostly protecting backend state. | 
|  | */ | 
|  | if (qid_is_file(f->dir.qid) && (f->flags & FSF_DIRTY)) | 
|  | pm_writeback_pages(f->pm); | 
|  | } | 
|  |  | 
|  | static void purge_cb(struct tree_file *tf) | 
|  | { | 
|  | ERRSTACK(1) | 
|  |  | 
|  | /* discard error, and keep on going if we can. */ | 
|  | if (!waserror()) | 
|  | writeback_file(&tf->file); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | static void __gtfs_destroy(uint32_t srcid, long a0, long a1, long a2) | 
|  | { | 
|  | struct gtfs *gtfs = (struct gtfs*)a0; | 
|  |  | 
|  | tfs_frontend_purge(>fs->tfs, purge_cb); | 
|  | /* this is the ref from attach */ | 
|  | assert(kref_refcnt(>fs->tfs.root->kref) == 1); | 
|  | tf_kref_put(gtfs->tfs.root); | 
|  | /* ensures __tf_free() happens before tfs_destroy */ | 
|  | rcu_barrier(); | 
|  | tfs_destroy(>fs->tfs); | 
|  | kfree(gtfs); | 
|  | } | 
|  |  | 
|  | static void gtfs_release(struct kref *kref) | 
|  | { | 
|  | struct gtfs *gtfs = container_of(kref, struct gtfs, users); | 
|  |  | 
|  | /* We can't use RCU within an RCU callback, and release methods are often | 
|  | * called from within callbacks.  We can use a kernel message, which can | 
|  | * block and do whatever else it wants.  In essence, we break the connection | 
|  | * to our current context (the rcu_mgmt_ktask) by using a kmsg. */ | 
|  | send_kernel_message(core_id(), __gtfs_destroy, (long)gtfs, 0, 0, | 
|  | KMSG_ROUTINE); | 
|  | } | 
|  |  | 
|  | static struct gtfs *chan_to_gtfs(struct chan *c) | 
|  | { | 
|  | struct tree_file *tf = chan_to_tree_file(c); | 
|  |  | 
|  | return (struct gtfs*)(tf->tfs); | 
|  | } | 
|  |  | 
|  | static void incref_gtfs_chan(struct chan *c) | 
|  | { | 
|  | kref_get(&chan_to_gtfs(c)->users, 1); | 
|  | } | 
|  |  | 
|  | static void decref_gtfs_chan(struct chan *c) | 
|  | { | 
|  | kref_put(&chan_to_gtfs(c)->users); | 
|  | } | 
|  |  | 
|  | static struct walkqid *gtfs_walk(struct chan *c, struct chan *nc, char **name, | 
|  | unsigned int nname) | 
|  | { | 
|  | struct walkqid *wq; | 
|  |  | 
|  | wq = tree_chan_walk(c, nc, name, nname); | 
|  | if (wq && wq->clone && (wq->clone != c)) | 
|  | incref_gtfs_chan(wq->clone); | 
|  | return wq; | 
|  | } | 
|  |  | 
|  | /* Given an omode, make sure the be chans are set up */ | 
|  | static void setup_be_chans(struct chan *c, int omode) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct tree_file *tf = chan_to_tree_file(c); | 
|  | struct fs_file *f = &tf->file; | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  |  | 
|  | qlock(&f->qlock); | 
|  | if (waserror()) { | 
|  | qunlock(&f->qlock); | 
|  | nexterror(); | 
|  | } | 
|  | /* Readers and writers both need be_read.  With fs files you can't have a | 
|  | * writable-only file, since we need to load the page into the page cache, | 
|  | * which is a readpage. */ | 
|  | if (!gp->be_read) | 
|  | gp->be_read = cclone_and_open(gp->be_walk, O_READ); | 
|  | if (!gp->be_write && (omode & O_WRITE)) | 
|  | gp->be_write = cclone_and_open(gp->be_walk, O_WRITE); | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | static struct chan *gtfs_open(struct chan *c, int omode) | 
|  | { | 
|  | /* truncate can happen before we setup the be_chans.  if we need those, we | 
|  | * can swap the order */ | 
|  | c = tree_chan_open(c, omode); | 
|  | setup_be_chans(c, omode); | 
|  | return c; | 
|  | } | 
|  |  | 
|  |  | 
|  | static void gtfs_create(struct chan *c, char *name, int omode, uint32_t perm, | 
|  | char *ext) | 
|  | { | 
|  | tree_chan_create(c, name, omode, perm, ext); | 
|  | /* We have to setup *after* create, since it moves the chan from the parent | 
|  | * to the new file. */ | 
|  | setup_be_chans(c, omode); | 
|  | } | 
|  |  | 
|  | static void gtfs_close(struct chan *c) | 
|  | { | 
|  | tree_chan_close(c); | 
|  | decref_gtfs_chan(c); | 
|  | } | 
|  |  | 
|  | static void gtfs_remove(struct chan *c) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct gtfs *gtfs = chan_to_gtfs(c); | 
|  |  | 
|  | if (waserror()) { | 
|  | /* Same old pain-in-the-ass for remove */ | 
|  | kref_put(>fs->users); | 
|  | nexterror(); | 
|  | } | 
|  | tree_chan_remove(c); | 
|  | kref_put(>fs->users); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | static size_t gtfs_wstat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz) | 
|  | { | 
|  | size_t ret; | 
|  |  | 
|  | ret = tree_chan_wstat(c, m_buf, m_buf_sz); | 
|  | /* Tell the backend so that any metadata changes take effect immediately. | 
|  | * Consider chmod +w.  We need to tell the 9p server so that it will allow | 
|  | * future accesses. */ | 
|  | sync_metadata(&chan_to_tree_file(c)->file); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Caller holds the file's qlock. */ | 
|  | static size_t __gtfs_fsf_read(struct fs_file *f, void *ubuf, size_t n, | 
|  | off64_t off) | 
|  | { | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  |  | 
|  | if (!gp->be_read) | 
|  | gp->be_read = cclone_and_open(gp->be_walk, O_READ); | 
|  | return devtab[gp->be_read->type].read(gp->be_read, ubuf, n, off); | 
|  | } | 
|  |  | 
|  | /* Reads a file from its backend chan */ | 
|  | static size_t gtfs_fsf_read(struct fs_file *f, void *ubuf, size_t n, | 
|  | off64_t off) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | size_t ret; | 
|  |  | 
|  | qlock(&f->qlock); | 
|  | if (waserror()) { | 
|  | qunlock(&f->qlock); | 
|  | nexterror(); | 
|  | } | 
|  | ret = __gtfs_fsf_read(f, ubuf, n, off); | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Caller holds the file's qlock. */ | 
|  | static size_t __gtfs_fsf_write(struct fs_file *f, void *ubuf, size_t n, | 
|  | off64_t off) | 
|  | { | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  | size_t ret; | 
|  |  | 
|  | if (!gp->be_write) | 
|  | gp->be_write = cclone_and_open(gp->be_walk, O_WRITE); | 
|  | ret = devtab[gp->be_write->type].write(gp->be_write, ubuf, n, off); | 
|  | gp->be_length = MAX(gp->be_length, n + ret); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Writes a file to its backend chan */ | 
|  | static size_t gtfs_fsf_write(struct fs_file *f, void *ubuf, size_t n, | 
|  | off64_t off) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | size_t ret; | 
|  |  | 
|  | qlock(&f->qlock); | 
|  | if (waserror()) { | 
|  | qunlock(&f->qlock); | 
|  | nexterror(); | 
|  | } | 
|  | ret = __gtfs_fsf_write(f, ubuf, n, off); | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static size_t gtfs_read(struct chan *c, void *ubuf, size_t n, off64_t off) | 
|  | { | 
|  | struct tree_file *tf = chan_to_tree_file(c); | 
|  |  | 
|  | if (tree_file_is_dir(tf)) | 
|  | return gtfs_fsf_read(&tf->file, ubuf, n, off); | 
|  | return fs_file_read(&tf->file, ubuf, n, off); | 
|  | } | 
|  |  | 
|  | /* Given a file (with dir->name set), couple it and sync to the backend chan. | 
|  | * This will store/consume the ref for backend, in the TF (freed with | 
|  | * gtfs_tf_free), even on error, unless you zero out the be_walk field. */ | 
|  | static void gtfs_tf_couple_backend(struct tree_file *tf, struct chan *backend) | 
|  | { | 
|  | struct dir *dir; | 
|  | struct gtfs_priv *gp = kzmalloc(sizeof(struct gtfs_priv), MEM_WAIT); | 
|  |  | 
|  | tf->file.priv = gp; | 
|  | tf->file.dir.qid = backend->qid; | 
|  | gp->be_walk = backend; | 
|  | dir = chandirstat(backend); | 
|  | if (!dir) | 
|  | error(ENOMEM, "chandirstat failed"); | 
|  | fs_file_copy_from_dir(&tf->file, dir); | 
|  | kfree(dir); | 
|  | /* For sync_metadata */ | 
|  | gp->be_length = tf->file.dir.length; | 
|  | gp->be_mode = tf->file.dir.mode; | 
|  | gp->be_mtime = tf->file.dir.mtime; | 
|  | } | 
|  |  | 
|  | static void gtfs_tf_free(struct tree_file *tf) | 
|  | { | 
|  | struct gtfs_priv *gp = tf_to_gtfs_priv(tf); | 
|  |  | 
|  | /* Might have some partially / never constructed tree files */ | 
|  | if (!gp) | 
|  | return; | 
|  | if (gp->was_removed) { | 
|  | gp->be_walk->type = -1; | 
|  | /* sanity */ | 
|  | assert(kref_refcnt(&gp->be_walk->ref) == 1); | 
|  | } | 
|  | cclose(gp->be_walk); | 
|  | /* I/O chans can be NULL */ | 
|  | cclose(gp->be_read); | 
|  | cclose(gp->be_write); | 
|  | kfree(gp); | 
|  | } | 
|  |  | 
|  | static void gtfs_tf_unlink(struct tree_file *parent, struct tree_file *child) | 
|  | { | 
|  | struct gtfs_priv *gp = tf_to_gtfs_priv(child); | 
|  | struct chan *be_walk = gp->be_walk; | 
|  |  | 
|  | /* Remove clunks the be_walk chan/fid.  if it succeeded (and I think even if | 
|  | * it didn't), we shouldn't close that fid again, which is what will happen | 
|  | * soon after this function.  The TF code calls unlink, then when the last | 
|  | * ref closes the TF, it'll get freed and we'll call back to gtfs_tf_free(). | 
|  | * | 
|  | * This is the same issue we run into with all of the device remove ops | 
|  | * where we want to refcnt something hanging off e.g. c->aux.  In 9p, you're | 
|  | * not supposed to close a chan/fid that was already removed. | 
|  | * | 
|  | * Now here's the weird thing.  We can close the be_walk chan after remove, | 
|  | * but it's possible that someone has walked and perhaps opened a frontend | 
|  | * chan + TF, but hasn't done a read yet.  So someone might want to set up | 
|  | * be_read, but they can't due to be_walk being closed.  We could give them | 
|  | * a 'phase error' (one of 9p's errors for I/O on a removed file). | 
|  | * | 
|  | * Alternatively, we can mark the gtfs_priv so that when we do free it, we | 
|  | * skip the dev.remove, similar to what sysremove() does.  That's probably | 
|  | * easier.  This is technically racy, but we know that the release/free | 
|  | * method won't be called until we return. */ | 
|  | gp->was_removed = true; | 
|  | devtab[be_walk->type].remove(be_walk); | 
|  | } | 
|  |  | 
|  | /* Caller sets the name, but doesn't know if it exists or not.  It's our job to | 
|  | * find out if it exists and fill in the child structure appropriately.  For | 
|  | * negative entries, just flagging it is fine.  Otherwise, we fill in the dir. | 
|  | * We should throw on error. */ | 
|  | static void gtfs_tf_lookup(struct tree_file *parent, struct tree_file *child) | 
|  | { | 
|  | struct walkqid *wq; | 
|  | struct chan *be_walk = tf_to_gtfs_priv(parent)->be_walk; | 
|  | struct chan *child_be_walk; | 
|  |  | 
|  | wq = devtab[be_walk->type].walk(be_walk, NULL, &child->file.dir.name, 1); | 
|  | if (!wq || !wq->clone) { | 
|  | kfree(wq); | 
|  | /* This isn't racy, since the child isn't linked to the tree yet */ | 
|  | child->flags |= TF_F_NEGATIVE | TF_F_HAS_BEEN_USED; | 
|  | return; | 
|  | } | 
|  | /* walk shouldn't give us the same chan struct since we gave it a name and a | 
|  | * NULL nc. */ | 
|  | assert(wq->clone != be_walk); | 
|  | /* only gave it one name, and it didn't fail. */ | 
|  | assert(wq->nqid == 1); | 
|  | /* sanity */ | 
|  | assert(wq->clone->qid.path == wq->qid[wq->nqid - 1].path); | 
|  | child_be_walk = wq->clone; | 
|  | kfree(wq); | 
|  | gtfs_tf_couple_backend(child, child_be_walk); | 
|  | } | 
|  |  | 
|  | static void gtfs_tf_create(struct tree_file *parent, struct tree_file *child, | 
|  | int perm) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c = cclone(tf_to_gtfs_priv(parent)->be_walk); | 
|  |  | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | devtab[c->type].create(c, tree_file_to_name(child), 0, perm, | 
|  | child->file.dir.ext); | 
|  | /* The chan c is opened, which we don't want.  We can't cclone it either | 
|  | * (since it is opened).  All we can do is have the parent walk again so we | 
|  | * can get the child's unopened be_walk chan.  Conveniently, that's | 
|  | * basically a lookup, so create is really two things: make it, then look it | 
|  | * up from the backend. */ | 
|  | cclose(c); | 
|  | poperror(); | 
|  | if (waserror()) { | 
|  | warn("File %s was created in the backend, but unable to look it up!", | 
|  | tree_file_to_name(child)); | 
|  | nexterror(); | 
|  | } | 
|  | gtfs_tf_lookup(parent, child); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | static void gtfs_wstat_rename(struct fs_file *f, const char *name) | 
|  | { | 
|  | struct dir dir; | 
|  |  | 
|  | init_empty_dir(&dir); | 
|  | dir.name = (char*)name; | 
|  | wstat_dir(f, &dir); | 
|  | } | 
|  |  | 
|  | static void gtfs_tf_rename(struct tree_file *tf, struct tree_file *old_parent, | 
|  | struct tree_file *new_parent, const char *name, | 
|  | int flags) | 
|  | { | 
|  | struct chan *tf_c = tf_to_gtfs_priv(tf)->be_walk; | 
|  | struct chan *np_c = tf_to_gtfs_priv(new_parent)->be_walk; | 
|  |  | 
|  | if (!devtab[tf_c->type].rename) { | 
|  | /* 9p can handle intra-directory renames, though some Akaros #devices | 
|  | * might throw. */ | 
|  | if (old_parent == new_parent) { | 
|  | gtfs_wstat_rename(&tf->file, name); | 
|  | return; | 
|  | } | 
|  | error(EXDEV, "%s: %s doesn't support rename", devname(), | 
|  | devtab[tf_c->type].name); | 
|  | } | 
|  | devtab[tf_c->type].rename(tf_c, np_c, name, flags); | 
|  | } | 
|  |  | 
|  | static bool gtfs_tf_has_children(struct tree_file *parent) | 
|  | { | 
|  | struct dir dir[1]; | 
|  |  | 
|  | assert(tree_file_is_dir(parent));	/* TF bug */ | 
|  | /* Any read should work, but there might be issues asking for something | 
|  | * smaller than a dir. | 
|  | * | 
|  | * Note we use the unlocked read here.  The fs_file's qlock is held by our | 
|  | * caller, and we reuse that qlock for the sync for reading/writing. */ | 
|  | return __gtfs_fsf_read(&parent->file, dir, sizeof(struct dir), 0) > 0; | 
|  | } | 
|  |  | 
|  | struct tree_file_ops gtfs_tf_ops = { | 
|  | .free = gtfs_tf_free, | 
|  | .unlink = gtfs_tf_unlink, | 
|  | .lookup = gtfs_tf_lookup, | 
|  | .create = gtfs_tf_create, | 
|  | .rename = gtfs_tf_rename, | 
|  | .has_children = gtfs_tf_has_children, | 
|  | }; | 
|  |  | 
|  | /* Fills page with its contents from its backing store file. | 
|  | * | 
|  | * Note the page/offset might be beyond the current file length, based on the | 
|  | * current pagemap code. */ | 
|  | static int gtfs_pm_readpage(struct page_map *pm, struct page *pg) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | void *kva = page2kva(pg); | 
|  | off64_t offset = pg->pg_index << PGSHIFT; | 
|  | size_t ret; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -get_errno(); | 
|  | } | 
|  | /* If offset is beyond the length of the file, the 9p device/server should | 
|  | * return 0.  We'll just init an empty page.  The length on the frontend (in | 
|  | * the fsf->dir.length) will be adjusted.  The backend will hear about it on | 
|  | * the next sync. */ | 
|  | ret = gtfs_fsf_read(pm->pm_file, kva, PGSIZE, offset); | 
|  | poperror(); | 
|  | if (ret < PGSIZE) | 
|  | memset(kva + ret, 0, PGSIZE - ret); | 
|  | atomic_or(&pg->pg_flags, PG_UPTODATE); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Meant to take the page from PM and flush to backing store. */ | 
|  | static int gtfs_pm_writepage(struct page_map *pm, struct page *pg) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct fs_file *f = pm->pm_file; | 
|  | void *kva = page2kva(pg); | 
|  | off64_t offset = pg->pg_index << PGSHIFT; | 
|  | size_t amt; | 
|  |  | 
|  | qlock(&f->qlock); | 
|  | if (waserror()) { | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | return -get_errno(); | 
|  | } | 
|  | /* Don't writeback beyond the length of the file.  Most of the time this | 
|  | * comes up is when the len is in the middle of the last page. */ | 
|  | if (offset >= fs_file_get_length(f)) { | 
|  | qunlock(&f->qlock); | 
|  | return 0; | 
|  | } | 
|  | amt = MIN(PGSIZE, fs_file_get_length(f) - offset); | 
|  | __gtfs_fsf_write(f, kva, amt, offset); | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Caller holds the file's qlock */ | 
|  | static void __trunc_to(struct fs_file *f, off64_t begin) | 
|  | { | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  | struct dir dir; | 
|  |  | 
|  | init_empty_dir(&dir); | 
|  | dir.length = begin; | 
|  | wstat_dir(f, &dir); | 
|  | /* recall the invariant: be_length == the backend's length */ | 
|  | gp->be_length = begin; | 
|  | } | 
|  |  | 
|  | /* Caller holds the file's qlock */ | 
|  | static void __zero_fill(struct fs_file *f, off64_t begin, off64_t end) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | void *zeros; | 
|  |  | 
|  | if (PGOFF(begin) || PGOFF(end)) | 
|  | error(EINVAL, "zero_fill had unaligned begin (%p) or end (%p)\n", | 
|  | begin, end); | 
|  | zeros = kpages_zalloc(PGSIZE, MEM_WAIT); | 
|  | if (waserror()) { | 
|  | kpages_free(zeros, PGSIZE); | 
|  | nexterror(); | 
|  | } | 
|  | for (off64_t o = begin; o < end; o += PGSIZE) | 
|  | __gtfs_fsf_write(f, zeros, PGSIZE, o); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | /* The intent here is for the backend to drop all data in the range.  Zeros are | 
|  | * OK - any future read should get a zero. | 
|  | * | 
|  | * These offsets are the beginning and end of the hole to punch.  The TF code | 
|  | * already dealt with edge cases, so these happen to be page aligned.  That | 
|  | * shouldn't matter for the backend device. | 
|  | * | 
|  | * Don't worry about a border page for end that is all zeros. | 
|  | * fs_file_truncate() rounded up to the nearest page to avoid issues.  The user | 
|  | * could manually punch a hole, and they could create a page of zeros at end. | 
|  | * We don't care. | 
|  | * | 
|  | * 9p doesn't have a hole-punch, so we'll truncate if we can and o/w fill with | 
|  | * zeros. | 
|  | * | 
|  | * Note that the frontend's file length often differs from the backend.  Under | 
|  | * normal operation, such as writing to a file, the frontend's len will be | 
|  | * greater than the backend's.  When we sync, the backend learns the real | 
|  | * length.  Similarly, when we shrink a gile, the backend's length may be | 
|  | * greater than the frontend.  Consider a truncate from 8192 to 4095: we punch | 
|  | * with begin = 4096, end = 8192.  In either case, the backend learns the real | 
|  | * length on a sync.  In punch_hole, we're just trying to discard old data. */ | 
|  | static void gtfs_fs_punch_hole(struct fs_file *f, off64_t begin, off64_t end) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct gtfs_priv *gp = fsf_to_gtfs_priv(f); | 
|  |  | 
|  | qlock(&f->qlock); | 
|  | if (waserror()) { | 
|  | qunlock(&f->qlock); | 
|  | nexterror(); | 
|  | } | 
|  | if (end >= gp->be_length) { | 
|  | if (begin < gp->be_length) | 
|  | __trunc_to(f, begin); | 
|  | } else { | 
|  | __zero_fill(f, begin, end); | 
|  | } | 
|  | qunlock(&f->qlock); | 
|  | poperror(); | 
|  | } | 
|  |  | 
|  | static bool gtfs_fs_can_grow_to(struct fs_file *f, size_t len) | 
|  | { | 
|  | /* TODO: are there any limits in 9p? */ | 
|  | return true; | 
|  | } | 
|  |  | 
|  | struct fs_file_ops gtfs_fs_ops = { | 
|  | .readpage = gtfs_pm_readpage, | 
|  | .writepage = gtfs_pm_writepage, | 
|  | .punch_hole = gtfs_fs_punch_hole, | 
|  | .can_grow_to = gtfs_fs_can_grow_to, | 
|  | }; | 
|  |  | 
|  | /* We're passed a backend chan, usually of type #mnt, used for an uncached | 
|  | * mount.  We call it 'backend.'  It is the result of an attach, e.g. mntattach. | 
|  | * In the case of #mnt, this chan is different than the one that has the 9p | 
|  | * server on the other side, called 'mchan'.  That chan is at backend->mchan, | 
|  | * and also the struct mnt->c.  The struct mnt is shared by all mounts talking | 
|  | * to the 9p server over the mchan, and is stored at mchan->mux.  Backend chans | 
|  | * have a strong (counted) ref on the mchan. | 
|  | * | 
|  | * We create and return a chan of #gtfs, suitable for attaching to the | 
|  | * namespace.  This chan will have the root TF hanging off aux, just like how | 
|  | * any other attached TFS has a root TF.  #gtfs manages the linkage between a TF | 
|  | * and the backend, which is the purpose of gtfs_priv. | 
|  | * | 
|  | * A note on refcounts: in the normal, uncached operation, the 'backend' chan | 
|  | * has a ref (actually a chan kref, which you cclose) on the comms chan (mchan). | 
|  | * We get one ref at mntattach time, and every distinct mntwalk gets another | 
|  | * ref.  Those actually get closed in chanfree(), since they are stored at | 
|  | * mchan. | 
|  | * | 
|  | * All gtfs *tree_files* have at least one refcounted chan corresponding to the | 
|  | * file/FID on the backend server.  Think of it as a 1:1 connection, even though | 
|  | * there is more than one chan.  The gtfs device can have many chans pointing to | 
|  | * the same TF, which is kreffed.  That TF is 1:1 on a backend object. | 
|  | * | 
|  | * All walks from this attach point will get chans with TFs from this TFS and | 
|  | * will incref the struct gtfs. | 
|  | */ | 
|  | static struct chan *gtfs_attach(char *arg) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *backend = (struct chan*)arg; | 
|  | struct chan *frontend; | 
|  | struct tree_filesystem *tfs; | 
|  | struct gtfs *gtfs; | 
|  |  | 
|  | frontend = devattach(devname(), 0); | 
|  | if (waserror()) { | 
|  | /* same as #mnt - don't cclose, since we don't want to devtab close, and | 
|  | * we know the ref == 1 here. */ | 
|  | chanfree(frontend); | 
|  | nexterror(); | 
|  | } | 
|  | gtfs = kzmalloc(sizeof(struct gtfs), MEM_WAIT); | 
|  | /* This 'users' kref is the one that every distinct frontend chan has. | 
|  | * These come from attaches and successful, 'moving' walks. */ | 
|  | kref_init(>fs->users, gtfs_release, 1); | 
|  | tfs = (struct tree_filesystem*)gtfs; | 
|  | /* This gives us one ref on root, released during gtfs_release().  name is | 
|  | * set to ".", though that gets overwritten during coupling. */ | 
|  | tfs_init(tfs); | 
|  | if (waserror()) { | 
|  | /* don't consume the backend ref on error, caller expects to have it */ | 
|  | tf_to_gtfs_priv(tfs->root)->be_walk = NULL; | 
|  | /* ref from tfs_init.  this should free the TF. */ | 
|  | tf_kref_put(tfs->root); | 
|  | tfs_destroy(tfs); | 
|  | kfree(gtfs); | 
|  | nexterror(); | 
|  | } | 
|  | /* stores the ref for 'backend' inside tfs->root */ | 
|  | gtfs_tf_couple_backend(tfs->root, backend); | 
|  | poperror(); | 
|  | tfs->tf_ops = gtfs_tf_ops; | 
|  | tfs->fs_ops = gtfs_fs_ops; | 
|  | /* need another ref on root for the frontend chan */ | 
|  | tf_kref_get(tfs->root); | 
|  | chan_set_tree_file(frontend, tfs->root); | 
|  | poperror(); | 
|  | return frontend; | 
|  | } | 
|  |  | 
|  | static bool lru_prune_cb(struct tree_file *tf) | 
|  | { | 
|  | ERRSTACK(1); | 
|  |  | 
|  | if (waserror()) { | 
|  | /* not much to do - ssh the file out? */ | 
|  | printk("Failed to sync file %s: %s\n", tree_file_to_name(tf), | 
|  | current_errstr()); | 
|  | poperror(); | 
|  | return false; | 
|  | } | 
|  | writeback_file(&tf->file); | 
|  | poperror(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static void pressure_dfs_cb(struct tree_file *tf) | 
|  | { | 
|  | if (!tree_file_is_dir(tf)) | 
|  | pm_free_unused_pages(tf->file.pm); | 
|  | } | 
|  |  | 
|  | /* Under memory pressure, there are a bunch of things we can do. */ | 
|  | static void gtfs_free_memory(struct gtfs *gtfs) | 
|  | { | 
|  | /* This attempts to remove every file from the LRU.  It'll write back dirty | 
|  | * files, then if they haven't been used since we started, it'll delete the | 
|  | * frontend TF, which will delete the entire page cache entry.  The heavy | 
|  | * lifting is done by TF code. */ | 
|  | tfs_lru_for_each(>fs->tfs, lru_prune_cb, -1); | 
|  | /* This drops the negative TFs.  It's not a huge deal, since they are small, | 
|  | * but perhaps it'll help. */ | 
|  | tfs_lru_prune_neg(>fs->tfs); | 
|  | /* This will attempt to free memory from all files in the frontend, | 
|  | * regardless of whether or not they are in use.  This might help if you | 
|  | * have some large files that happened to be open. */ | 
|  | tfs_frontend_for_each(>fs->tfs, pressure_dfs_cb); | 
|  | } | 
|  |  | 
|  | static void gtfs_sync_tf(struct tree_file *tf) | 
|  | { | 
|  | writeback_file(&tf->file); | 
|  | } | 
|  |  | 
|  | static void gtfs_sync_gtfs(struct gtfs *gtfs) | 
|  | { | 
|  | tfs_frontend_for_each(>fs->tfs, gtfs_sync_tf); | 
|  | } | 
|  |  | 
|  | /* chan_ctl or something can hook into these functions */ | 
|  | static void gtfs_sync_chan(struct chan *c) | 
|  | { | 
|  | gtfs_sync_tf(chan_to_tree_file(c)); | 
|  | } | 
|  |  | 
|  | static void gtfs_sync_chans_fs(struct chan *any_c) | 
|  | { | 
|  | gtfs_sync_gtfs(chan_to_gtfs(any_c)); | 
|  | } | 
|  |  | 
|  | static unsigned long gtfs_chan_ctl(struct chan *c, int op, unsigned long a1, | 
|  | unsigned long a2, unsigned long a3, | 
|  | unsigned long a4) | 
|  | { | 
|  | switch (op) { | 
|  | case CCTL_SYNC: | 
|  | if (tree_file_is_dir(chan_to_tree_file(c))) | 
|  | gtfs_sync_chans_fs(c); | 
|  | else | 
|  | gtfs_sync_chan(c); | 
|  | return 0; | 
|  | default: | 
|  | error(EINVAL, "%s does not support %d", __func__, op); | 
|  | } | 
|  | } | 
|  |  | 
|  | struct dev gtfs_devtab __devtab = { | 
|  | .name = "gtfs", | 
|  |  | 
|  | .reset = devreset, | 
|  | .init = devinit, | 
|  | .shutdown = devshutdown, | 
|  | .attach = gtfs_attach, | 
|  | .walk = gtfs_walk, | 
|  | .stat = tree_chan_stat, | 
|  | .open = gtfs_open, | 
|  | .create = gtfs_create, | 
|  | .close = gtfs_close, | 
|  | .read = gtfs_read, | 
|  | .bread = devbread, | 
|  | .write = tree_chan_write, | 
|  | .bwrite = devbwrite, | 
|  | .remove = gtfs_remove, | 
|  | .rename = tree_chan_rename, | 
|  | .wstat = gtfs_wstat, | 
|  | .power = devpower, | 
|  | .chaninfo = devchaninfo, | 
|  | .mmap = tree_chan_mmap, | 
|  | .chan_ctl = gtfs_chan_ctl, | 
|  | }; |