blob: 5c7476fa819a0d8704fc10e5949116c66315be19 [file] [log] [blame]
/* Copyright (c) 2010 The Regents of the University of California
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* Ext2, VFS required functions, internal functions, life, the universe, and
* everything! */
#include <vfs.h>
#include <ext2fs.h>
#include <blockdev.h>
#include <kmalloc.h>
#include <assert.h>
#include <kref.h>
#include <endian.h>
#include <error.h>
#include <pmap.h>
#include <bitmask.h>
/* These structs are declared again and initialized farther down */
struct page_map_operations ext2_pm_op;
struct super_operations ext2_s_op;
struct inode_operations ext2_i_op;
struct dentry_operations ext2_d_op;
struct file_operations ext2_f_op_file;
struct file_operations ext2_f_op_dir;
struct file_operations ext2_f_op_sym;
/* EXT2 Internal Functions */
/* Useful helper functions. */
/* Returns the block group ID of the BG containing the inode. BGs start with 0,
* inodes are indexed starting at 1. */
static struct ext2_block_group *ext2_inode2bg(struct inode *inode)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
unsigned int bg_num = (inode->i_ino - 1) /
le32_to_cpu(e2sbi->e2sb->s_inodes_per_group);
return &e2sbi->e2bg[bg_num];
}
/* This returns the inode's 0-index within a block group */
static unsigned int ext2_inode2bgidx(struct inode *inode)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
return (inode->i_ino - 1) % le32_to_cpu(e2sbi->e2sb->s_inodes_per_group);
}
/* Returns the inode number given a 0-index of an inode within a block group */
static unsigned long ext2_bgidx2ino(struct super_block *sb,
struct ext2_block_group *bg,
unsigned int ino_idx)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
struct ext2_sb *e2sb = e2sbi->e2sb;
struct ext2_block_group *e2bg = e2sbi->e2bg;
return (bg - e2bg) * le32_to_cpu(e2sb->s_inodes_per_group) + ino_idx + 1;
}
/* Returns an uncounted reference to the BG in the BG table, which is pinned,
* hanging off the sb. Note, the BGs cover the blocks starting from the first
* data block, not from 0. So if the FDB is 1, BG 0 covers 1 through 1024, and
* not 0 through 1023. */
static struct ext2_block_group *ext2_block2bg(struct super_block *sb,
uint32_t blk_num)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
unsigned int bg_num;
bg_num = (blk_num - le32_to_cpu(e2sbi->e2sb->s_first_data_block)) /
le32_to_cpu(e2sbi->e2sb->s_blocks_per_group);
return &e2sbi->e2bg[bg_num];
}
/* This returns the block's 0-index within a block group. Note all blocks are
* offset by FDB when dealing with BG membership. */
static unsigned int ext2_block2bgidx(struct super_block *sb, uint32_t blk_num)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
return (blk_num - le32_to_cpu(e2sbi->e2sb->s_first_data_block)) %
le32_to_cpu(e2sbi->e2sb->s_blocks_per_group);
}
/* Returns the FS block for the given BG's idx block */
static uint32_t ext2_bgidx2block(struct super_block *sb,
struct ext2_block_group *bg,
unsigned int blk_idx)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
struct ext2_sb *e2sb = e2sbi->e2sb;
struct ext2_block_group *e2bg = e2sbi->e2bg;
return (bg - e2bg) * le32_to_cpu(e2sb->s_blocks_per_group) + blk_idx +
le32_to_cpu(e2sb->s_first_data_block);
}
/* Slabs for ext2 specific info chunks */
struct kmem_cache *ext2_i_kcache;
/* One-time init for all ext2 instances */
void ext2_init(void)
{
ext2_i_kcache = kmem_cache_create("ext2_i_info", sizeof(struct ext2_i_info),
__alignof__(struct ext2_i_info), 0, 0, 0);
}
/* Block management */
/* TODO: pull these metablock functions out of ext2 */
/* Makes sure the FS block of metadata is in memory. This returns a pointer to
* the beginning of the requested block. Release it with put_metablock().
* Internally, the kreffing is done on the page. */
void *__ext2_get_metablock(struct block_device *bdev, unsigned long blk_num,
unsigned int blk_sz)
{
return bdev_get_buffer(bdev, blk_num, blk_sz)->bh_buffer;
}
/* Convenience wrapper */
void *ext2_get_metablock(struct super_block *sb, unsigned long block_num)
{
return __ext2_get_metablock(sb->s_bdev, block_num, sb->s_blocksize);
}
/* Helper to figure out the BH for any address within it's buffer */
static struct buffer_head *ext2_my_bh(struct super_block *sb, void *addr)
{
struct page *page = kva2page(addr);
struct buffer_head *bh = (struct buffer_head*)page->pg_private;
/* This case is for when we try do decref a non-BH'd 'metablock'. It's tied
* to e2ii->i_block[]. */
if (!bh)
return 0;
void *my_buf = (void*)ROUNDDOWN((uintptr_t)addr, sb->s_blocksize);
while (bh) {
if (bh->bh_buffer == my_buf)
break;
bh = bh->bh_next;
}
assert(bh && bh->bh_buffer == my_buf);
return bh;
}
/* Decrefs the buffer from get_metablock(). Call this when you no longer
* reference your metadata block/buffer. Yes, we could just decref the page,
* but this will work if we end up changing how bdev_put_buffer() works. */
void ext2_put_metablock(struct super_block *sb, void *buffer)
{
struct buffer_head *bh = ext2_my_bh(sb, buffer);
if (bh)
bdev_put_buffer(bh);
}
/* Will dirty the block/BH/page for the given metadata block/buffer. */
void ext2_dirty_metablock(struct super_block *sb, void *buffer)
{
struct buffer_head *bh = ext2_my_bh(sb, buffer);
if (bh)
bdev_dirty_buffer(bh);
}
/* Helper for alloc_block. It will try to alloc a block from the BG, starting
* with blk_idx (relative number within the BG). If successful, it will return
* the FS block number via *block_num. TODO: concurrency protection */
static bool ext2_tryalloc(struct super_block *sb, struct ext2_block_group *bg,
unsigned int blk_idx, uint32_t *block_num)
{
uint8_t *blk_bitmap;
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
unsigned int blks_per_bg = le32_to_cpu(e2sbi->e2sb->s_blocks_per_group);
bool found = FALSE;
/* Check to see if there are any free blocks */
if (!le32_to_cpu(bg->bg_free_blocks_cnt))
return FALSE;
/* Check the bitmap for your desired block. We'll loop through the whole
* BG, starting with the one we want first. */
blk_bitmap = ext2_get_metablock(sb, bg->bg_block_bitmap);
for (int i = 0; i < blks_per_bg; i++) {
if (!(GET_BITMASK_BIT(blk_bitmap, blk_idx))) {
SET_BITMASK_BIT(blk_bitmap, blk_idx);
bg->bg_free_blocks_cnt--;
ext2_dirty_metablock(sb, blk_bitmap);
found = TRUE;
break;
}
/* Note: the wrap-around hasn't been tested yet */
blk_idx = (blk_idx + 1) % blks_per_bg;
}
ext2_put_metablock(sb, blk_bitmap);
if (found)
*block_num = ext2_bgidx2block(sb, bg, blk_idx);
return found;
}
/* This allocates a fresh block for the inode, preferably 'fetish' (name
* courtesy of L.F.), returning the FS block number that's been allocated.
* Note, Linux does some block preallocation here. Consider doing the same (off
* the in-memory inode). Note the lack of concurrency protections here. */
uint32_t ext2_alloc_block(struct inode *inode, uint32_t fetish)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
struct ext2_block_group *fetish_bg, *bg_i = e2sbi->e2bg;
unsigned int blk_idx;
bool found = FALSE;
uint32_t retval = 0;
/* Get our ideal starting point */
fetish_bg = ext2_block2bg(inode->i_sb, fetish);
blk_idx = ext2_block2bgidx(inode->i_sb, fetish);
/* Try to find a free block in the BG of the one we desire */
found = ext2_tryalloc(inode->i_sb, fetish_bg, blk_idx, &retval);
if (found)
return retval;
warn("This part hasn't been tested yet.");
/* Find a block anywhere else (perhaps using the log trick, but for now just
* linearly scanning). */
for (int i = 0; i < e2sbi->nr_bgs; i++, bg_i++) {
if (bg_i == fetish_bg)
continue;
found = ext2_tryalloc(inode->i_sb, bg_i, 0, &retval);
if (found)
break;
}
if (!found)
panic("Ran out of blocks! (probably a bug)");
return retval;
}
/* Inode Management */
/* Helper for alloc_diskinode. It will try to alloc a disk inode from the BG.
* If successful, it will return the inode number in *ino_num. TODO:
* concurrency protection */
static bool ext2_tryalloc_diskinode(struct super_block *sb,
struct ext2_block_group *bg,
unsigned long *ino_num)
{
uint8_t *ino_bitmap;
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
unsigned int i, ino_per_bg = le32_to_cpu(e2sbi->e2sb->s_inodes_per_group);
bool found = FALSE;
/* Check to see if there are any free inodes */
if (!le32_to_cpu(bg->bg_free_inodes_cnt))
return FALSE;
/* Check the bitmap for the free inode */
ino_bitmap = ext2_get_metablock(sb, bg->bg_inode_bitmap);
for (i = 0; i < ino_per_bg; i++) {
if (!(GET_BITMASK_BIT(ino_bitmap, i))) {
SET_BITMASK_BIT(ino_bitmap, i);
bg->bg_free_inodes_cnt--;
ext2_dirty_metablock(sb, ino_bitmap);
found = TRUE;
break;
}
}
ext2_put_metablock(sb, ino_bitmap);
/* Convert the i (a 0-index bit) within the BG to a real inode number. */
if (found)
*ino_num = ext2_bgidx2ino(sb, bg, i);
return found;
}
/* This allocates a fresh ino number for inode, given the parent's BG. Make
* sure you set the inode's type before calling this, since it matters if we a
* making a directory or not. This disk inode is reserved on disk in the bitmap
* (at least the bitmap is changed and dirtied). Note the lack of concurrency
* protections here. Consider returning the BG too. */
unsigned long ext2_alloc_diskinode(struct inode *inode,
struct ext2_block_group *dir_bg)
{
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
struct ext2_block_group *bg = dir_bg;
struct ext2_block_group *bg_i = e2sbi->e2bg;
bool found = FALSE;
unsigned long retval = 0;
if (S_ISDIR(inode->i_mode)) {
/* TODO: intelligently pick a different bg to use than the current one.
* Right now, we just jump to the next one, though you should do things
* like take into account the ratio of directories to files. */
bg += 1;
}
/* Try to find a free inode in the chosen BG */
found = ext2_tryalloc_diskinode(inode->i_sb, bg, &retval);
if (found)
return retval;
warn("This part hasn't been tested yet.");
/* Find an inode anywhere else (perhaps using the log trick, but for now just
* linearly scanning). */
for (int i = 0; i < e2sbi->nr_bgs; i++, bg_i++) {
if (bg_i == bg)
continue;
found = ext2_tryalloc_diskinode(inode->i_sb, bg_i, &retval);
if (found)
break;
}
if (!found)
panic("Ran out of inodes! (probably a bug)");
return retval;
}
/* Helper for ino table management. blkid is the inode table block we are
* looking in, rel_blkid is the block we want, relative to the current
* threshhold for a level of indirection, and reach is how many items a given
* slot indexes. Returns a pointer to the slot for the given block. */
static uint32_t *ext2_find_inotable_slot(struct inode *inode, uint32_t blkid,
uint32_t rel_blkid,
unsigned int reach)
{
uint32_t *blk_buf = ext2_get_metablock(inode->i_sb, blkid);
assert(blk_buf);
return &blk_buf[rel_blkid / reach];
}
/* If blk_slot is empty (no block mapped there) it will alloc and link a new
* block. This is only used for allocating a block to be an indirect table
* (it's grabbing a metablock, we have no hint, and it handles the buffer
* differently than for a file page/buffer). */
static void ext2_fill_inotable_slot(struct inode *inode, uint32_t *blk_slot)
{
uint32_t new_blkid, hint_blk;
void *new_blk;
if (le32_to_cpu(*blk_slot))
return;
/* Use any block in our inode's BG as a hint for the indirect block */
hint_blk = ext2_bgidx2block(inode->i_sb, ext2_inode2bg(inode), 0);
new_blkid = ext2_alloc_block(inode, hint_blk);
/* Actually read in the block we alloc'd */
new_blk = ext2_get_metablock(inode->i_sb, new_blkid);
memset(new_blk, 0, inode->i_sb->s_blocksize);
ext2_dirty_metablock(inode->i_sb, new_blk);
/* We put it, despite it getting relooked up in the next walk */
ext2_put_metablock(inode->i_sb, new_blk);
/* Now write the new block into its slot */
*blk_slot = cpu_to_le32(new_blkid);
ext2_dirty_metablock(inode->i_sb, blk_slot);
}
/* This walks a table stored at block 'blkid', returning which block you should
* walk next in 'blkid'. rel_inoblk is where you are given the current level of
* indirection tables, and returns where you should be for the next one. Reach
* is how many items the current table's *items* can index (so if we're on a
* 3x indir block, reach should be for the doubly-indirect entries, and
* rel_inoblk will tell you where within that double block you want).
*
* This will also alloc intermediate tables if there isn't one already (TODO:
* concurrency protection on modifying the table). */
static void ext2_walk_inotable(struct inode *inode, uint32_t *blkid,
uint32_t *rel_inoblk, unsigned int reach)
{
uint32_t *blk_slot;
blk_slot = ext2_find_inotable_slot(inode, *blkid, *rel_inoblk, reach);
/* We could only do this based on a bool, but if we're trying to walk it,
* we ought to want to alloc if there is no block. */
ext2_fill_inotable_slot(inode, blk_slot);
*blkid = le32_to_cpu(*blk_slot);
*rel_inoblk = *rel_inoblk % reach;
ext2_put_metablock(inode->i_sb, blk_slot); /* ref for the one looked in */
}
/* Finds the slot of the FS block corresponding to a specific block number of an
* inode. It does this by walking the inode's tables. The general idea is that
* if the ino_block num is above a threshold, we'll need to go into indirect
* tables (1x, 2x, or 3x (triply indirect) tables). Block numbers start at 0.
*
* This returns a pointer within a metablock, which needs to be decref'd (and
* possibly dirtied) when you are done. Note, it can return a pointer to
* something that is NOT in a metablock (e2ii->i_block[]), but put_metablock can
* handle it for now.
*
* Horrendously untested, btw. */
uint32_t *ext2_lookup_inotable_slot(struct inode *inode, uint32_t ino_block)
{
struct ext2_i_info *e2ii = (struct ext2_i_info*)inode->i_fs_info;
uint32_t blkid, *blk_slot;
/* The 'reach' is how many blocks a given table can 'address' */
int ptrs_per_blk = inode->i_sb->s_blocksize / sizeof(uint32_t);
int reach_1xblk = ptrs_per_blk;
int reach_2xblk = ptrs_per_blk * ptrs_per_blk;
/* thresholds are the first blocks that require a level of indirection */
int single_threshold = 12;
int double_threshold = single_threshold + reach_1xblk;
int triple_threshold = double_threshold + reach_2xblk;
/* this is the desired block num lookup within a level of indirection. It
* will need to be offset based on what level of lookups we want (try it in
* your head with 12 first). */
uint32_t rel_inoblk;
if (ino_block >= triple_threshold) {
/* ino_block requires a triply-indirect lookup */
rel_inoblk = ino_block - triple_threshold;
/* Make sure a 14 block (3x indirect) is there */
ext2_fill_inotable_slot(inode, &e2ii->i_block[14]);
blkid = e2ii->i_block[14];
ext2_walk_inotable(inode, &blkid, &rel_inoblk, reach_2xblk);
ext2_walk_inotable(inode, &blkid, &rel_inoblk, reach_1xblk);
blk_slot = ext2_find_inotable_slot(inode, blkid, rel_inoblk, 1);
} else if (ino_block >= double_threshold) {
/* ino_block requires a doubly-indirect lookup */
rel_inoblk = ino_block - double_threshold;
ext2_fill_inotable_slot(inode, &e2ii->i_block[13]);
blkid = e2ii->i_block[13];
ext2_walk_inotable(inode, &blkid, &rel_inoblk, reach_1xblk);
blk_slot = ext2_find_inotable_slot(inode, blkid, rel_inoblk, 1);
} else if (ino_block >= single_threshold) {
/* ino_block requires a singly-indirect lookup */
rel_inoblk = ino_block - single_threshold;
ext2_fill_inotable_slot(inode, &e2ii->i_block[12]);
blkid = e2ii->i_block[12];
blk_slot = ext2_find_inotable_slot(inode, blkid, rel_inoblk, 1);
} else {
/* Direct block, straight out of the inode */
blk_slot = &e2ii->i_block[ino_block];
}
return blk_slot;
}
/* Determines the FS block id for a given inode block id. Convenience wrapper
* that may go away soon. */
uint32_t ext2_find_inoblock(struct inode *inode, unsigned int ino_block)
{
uint32_t retval, *buf = ext2_lookup_inotable_slot(inode, ino_block);
retval = *buf;
ext2_put_metablock(inode->i_sb, buf);
return retval;
}
/* Returns an incref'd metadata block for the contents of the ino block. Don't
* use this for regular files - use their inode's page cache instead (used for
* directories for now). If there isn't a block allocated yet, it will provide
* a zeroed one. */
void *ext2_get_ino_metablock(struct inode *inode, unsigned long ino_block)
{
uint32_t blkid, *retval, *blk_slot;
blk_slot = ext2_lookup_inotable_slot(inode, ino_block);
blkid = le32_to_cpu(*blk_slot);
if (blkid) {
ext2_put_metablock(inode->i_sb, blk_slot);
return ext2_get_metablock(inode->i_sb, blkid);
}
/* If there isn't a block there, alloc and insert one. This block will be
* the next big chunk of "file" data for this inode. */
blkid = ext2_alloc_block(inode, ext2_bgidx2block(inode->i_sb,
ext2_inode2bg(inode),
0));
*blk_slot = cpu_to_le32(blkid);
ext2_dirty_metablock(inode->i_sb, blk_slot);
ext2_put_metablock(inode->i_sb, blk_slot);
inode->i_blocks += inode->i_sb->s_blocksize >> 9; /* inc by 1 FS block */
inode->i_size += inode->i_sb->s_blocksize;
retval = ext2_get_metablock(inode->i_sb, blkid);
memset(retval, 0, inode->i_sb->s_blocksize); /* 0 the new block */
return retval;
}
/* This should help with degubbing. In read_inode(), print out the i_block, and
* consider manually (via memory inspection) examining those blocks. Odds are,
* the 2x and 3x walks are jacked up. */
void ext2_print_ino_blocks(struct inode *inode)
{
printk("Inode %p, Size: %d, 512B 'blocks': %d\n-------------\n", inode,
inode->i_size, inode->i_blocks);
for (int i = 0; i < inode->i_blocks * (inode->i_sb->s_blocksize / 512); i++)
printk("# %03d, Block %03d\n", i, ext2_find_inoblock(inode, i));
}
/* Misc Functions */
/* This checks an ext2 disc SB for consistency, optionally printing out its
* stats. It also will also read in a copy of the block group descriptor table
* from its first location (right after the primary SB copy) */
void ext2_check_sb(struct ext2_sb *e2sb, struct ext2_block_group *bg,
bool print)
{
int retval;
unsigned int blksize, blks_per_group, num_blk_group, num_blks;
unsigned int inodes_per_grp, inode_size;
unsigned int sum_blks = 0, sum_inodes = 0;
assert(le16_to_cpu(e2sb->s_magic) == EXT2_SUPER_MAGIC);
num_blks = le32_to_cpu(e2sb->s_blocks_cnt);
blksize = 1024 << le32_to_cpu(e2sb->s_log_block_size);
blks_per_group = le32_to_cpu(e2sb->s_blocks_per_group);
num_blk_group = num_blks / blks_per_group + (num_blks % blks_per_group ? 1 : 0);
if (print) {
printk("EXT2 info:\n-------------------------\n");
printk("Total Inodes: %8d\n", le32_to_cpu(e2sb->s_inodes_cnt));
printk("Total Blocks: %8d\n", le32_to_cpu(e2sb->s_blocks_cnt));
printk("Num R-Blocks: %8d\n", le32_to_cpu(e2sb->s_rblocks_cnt));
printk("Num Free Blocks: %8d\n", le32_to_cpu(e2sb->s_free_blocks_cnt));
printk("Num Free Inodes: %8d\n", le32_to_cpu(e2sb->s_free_inodes_cnt));
printk("First Data Block: %8d\n",
le32_to_cpu(e2sb->s_first_data_block));
printk("Block Size: %8d\n",
1024 << le32_to_cpu(e2sb->s_log_block_size));
printk("Fragment Size: %8d\n",
1024 << le32_to_cpu(e2sb->s_log_frag_size));
printk("Blocks per group: %8d\n",
le32_to_cpu(e2sb->s_blocks_per_group));
printk("Inodes per group: %8d\n",
le32_to_cpu(e2sb->s_inodes_per_group));
printk("Block groups: %8d\n", num_blk_group);
printk("Mount state: %8d\n", le16_to_cpu(e2sb->s_state));
printk("Rev Level: %8d\n", le32_to_cpu(e2sb->s_minor_rev_level));
printk("Minor Rev Level: %8d\n", le16_to_cpu(e2sb->s_minor_rev_level));
printk("Creator OS: %8d\n", le32_to_cpu(e2sb->s_creator_os));
printk("First Inode: %8d\n", le32_to_cpu(e2sb->s_first_ino));
printk("Inode size: %8d\n", le16_to_cpu(e2sb->s_inode_size));
printk("This block group: %8d\n", le16_to_cpu(e2sb->s_block_group_nr));
printk("BG ID of 1st meta:%8d\n", le16_to_cpu(e2sb->s_first_meta_bg));
printk("Volume name: %s\n", e2sb->s_volume_name);
printk("\nBlock Group Info:\n----------------------\n");
}
for (int i = 0; i < num_blk_group; i++) {
sum_blks += le16_to_cpu(bg[i].bg_free_blocks_cnt);
sum_inodes += le16_to_cpu(bg[i].bg_free_inodes_cnt);
if (print) {
printk("*** BG %d at %p\n", i, &bg[i]);
printk("Block bitmap:%8d\n", le32_to_cpu(bg[i].bg_block_bitmap));
printk("Inode bitmap:%8d\n", le32_to_cpu(bg[i].bg_inode_bitmap));
printk("Inode table: %8d\n", le32_to_cpu(bg[i].bg_inode_table));
printk("Free blocks: %8d\n", le16_to_cpu(bg[i].bg_free_blocks_cnt));
printk("Free inodes: %8d\n", le16_to_cpu(bg[i].bg_free_inodes_cnt));
printk("Used Dirs: %8d\n", le16_to_cpu(bg[i].bg_used_dirs_cnt));
}
}
/* Sanity Assertions. A good ext2 will always pass these. */
inodes_per_grp = le32_to_cpu(e2sb->s_inodes_per_group);
blks_per_group = le32_to_cpu(e2sb->s_blocks_per_group);
inode_size = le32_to_cpu(e2sb->s_inode_size);
assert(le32_to_cpu(e2sb->s_inodes_cnt) <= inodes_per_grp * num_blk_group);
assert(le32_to_cpu(e2sb->s_free_inodes_cnt) == sum_inodes);
assert(le32_to_cpu(e2sb->s_blocks_cnt) <= blks_per_group * num_blk_group);
assert(le32_to_cpu(e2sb->s_free_blocks_cnt) == sum_blks);
if (blksize == 1024)
assert(le32_to_cpu(e2sb->s_first_data_block) == 1);
else
assert(le32_to_cpu(e2sb->s_first_data_block) == 0);
assert(inode_size <= blksize);
assert(inode_size == 1 << LOG2_UP(inode_size));
assert(blksize * 8 >= inodes_per_grp);
assert(inodes_per_grp % (blksize / inode_size) == 0);
if (print)
printk("Passed EXT2 Checks\n");
}
/* VFS required Misc Functions */
/* Creates the SB. Like with Ext2's, we should consider pulling out the
* FS-independent stuff, if possible. */
struct super_block *ext2_get_sb(struct fs_type *fs, int flags,
char *dev_name, struct vfsmount *vmnt)
{
struct block_device *bdev;
struct ext2_sb *e2sb;
struct ext2_block_group *e2bg;
unsigned int blks_per_group, num_blk_group, num_blks;
static bool ran_once = FALSE;
if (!ran_once) {
ran_once = TRUE;
ext2_init();
}
bdev = get_bdev(dev_name);
assert(bdev);
/* Read the SB. It's always at byte 1024 and 1024 bytes long. Note we do
* not put the metablock (we pin it off the sb later). Same with e2bg. */
e2sb = (struct ext2_sb*)__ext2_get_metablock(bdev, 1, 1024);
if (!(le16_to_cpu(e2sb->s_magic) == EXT2_SUPER_MAGIC)) {
warn("EXT2 Not detected when it was expected!");
return 0;
}
/* Read in the block group descriptor table. Which block the BG table is on
* depends on the blocksize */
unsigned int blksize = 1024 << le32_to_cpu(e2sb->s_log_block_size);
e2bg = __ext2_get_metablock(bdev, blksize == 1024 ? 2 : 1, blksize);
assert(e2bg);
ext2_check_sb(e2sb, e2bg, FALSE);
/* Now we build and init the VFS SB */
struct super_block *sb = get_sb();
sb->s_dev = 0; /* what do we really want here? */
sb->s_blocksize = blksize;
/* max file size for a 1024 blocksize FS. good enough for now (TODO) */
sb->s_maxbytes = 17247252480;
sb->s_type = &ext2_fs_type;
sb->s_op = &ext2_s_op;
sb->s_flags = flags; /* from the disc too? which flags are these? */
sb->s_magic = EXT2_SUPER_MAGIC;
sb->s_mount = vmnt; /* Kref? also in KFS */
sb->s_syncing = FALSE;
kref_get(&bdev->b_kref, 1);
sb->s_bdev = bdev;
strlcpy(sb->s_name, "EXT2", 32);
sb->s_fs_info = kmalloc(sizeof(struct ext2_sb_info), 0);
assert(sb->s_fs_info);
/* store the in-memory copy of the disk SB and bg desc table */
((struct ext2_sb_info*)sb->s_fs_info)->e2sb = e2sb;
((struct ext2_sb_info*)sb->s_fs_info)->e2bg = e2bg;
/* Precompute the number of BGs */
num_blks = le32_to_cpu(e2sb->s_blocks_cnt);
blks_per_group = le32_to_cpu(e2sb->s_blocks_per_group);
((struct ext2_sb_info*)sb->s_fs_info)->nr_bgs = num_blks / blks_per_group +
(num_blks % blks_per_group ? 1 : 0);
/* Final stages of initializing the sb, mostly FS-independent */
init_sb(sb, vmnt, &ext2_d_op, EXT2_ROOT_INO, 0);
printk("EXT2 superblock loaded\n");
kref_put(&bdev->b_kref);
return sb;
}
void ext2_kill_sb(struct super_block *sb)
{
/* don't forget to kfree the s_fs_info and its two members */
panic("Killing an EXT2 SB is not supported!");
}
/* Every FS must have a static FS Type, with which the VFS code can bootstrap */
struct fs_type ext2_fs_type = {"EXT2", 0, ext2_get_sb, ext2_kill_sb, {0, 0},
TAILQ_HEAD_INITIALIZER(ext2_fs_type.fs_supers)};
/* Page Map Operations */
/* Sets up the bidirectional mapping between the page and its buffer heads. As
* a future optimization, we could try and detect if all of the blocks are
* contiguous (either before or after making them) and compact them to one BH.
* Note there is an assumption that the file has at least one block in it. */
int ext2_mappage(struct page_map *pm, struct page *page)
{
struct buffer_head *bh;
struct inode *inode = (struct inode*)pm->pm_host;
assert(!page->pg_private); /* double check that we aren't bh-mapped */
assert(inode->i_mapping == pm); /* double check we are the inode for pm */
struct block_device *bdev = inode->i_sb->s_bdev;
unsigned int blk_per_pg = PGSIZE / inode->i_sb->s_blocksize;
unsigned int sct_per_blk = inode->i_sb->s_blocksize / bdev->b_sector_sz;
uint32_t ino_blk_num, fs_blk_num = 0, *fs_blk_slot;
bh = kmem_cache_alloc(bh_kcache, 0);
page->pg_private = bh;
for (int i = 0; i < blk_per_pg; i++) {
/* free_bh() can handle having a halfway aborted mappage() */
if (!bh)
return -ENOMEM;
bh->bh_page = page; /* weak ref */
bh->bh_buffer = page2kva(page) + i * inode->i_sb->s_blocksize;
bh->bh_flags = 0; /* whatever... */
bh->bh_bdev = bdev; /* uncounted ref */
/* compute the first sector of the FS block for the ith buf in the pg */
ino_blk_num = page->pg_index * blk_per_pg + i;
fs_blk_slot = ext2_lookup_inotable_slot(inode, ino_blk_num);
/* If there isn't a block there, lets get one. The previous fs_blk_num
* is our hint (or we have to compute one). */
if (!*fs_blk_slot) {
if (!fs_blk_num) {
fs_blk_num = ext2_bgidx2block(inode->i_sb,
ext2_inode2bg(inode), 0);
}
fs_blk_num = ext2_alloc_block(inode, fs_blk_num + 1);
/* Link it, and dirty the inode indirect block */
*fs_blk_slot = cpu_to_le32(fs_blk_num);
ext2_dirty_metablock(inode->i_sb, fs_blk_slot);
/* the block is still on disk, and we don't want its contents */
bh->bh_flags = BH_NEEDS_ZEROED; /* talking to readpage */
/* update our num blocks, with 512B each "block" (ext2-style) */
inode->i_blocks += inode->i_sb->s_blocksize >> 9;
} else { /* there is a block there already */
fs_blk_num = *fs_blk_slot;
}
ext2_put_metablock(inode->i_sb, fs_blk_slot);
bh->bh_sector = fs_blk_num * sct_per_blk;
bh->bh_nr_sector = sct_per_blk;
/* Stop if we're the last block in the page. We could be going beyond
* the end of the file, in which case the next BHs will be zeroed. */
if (i == blk_per_pg - 1) {
bh->bh_next = 0;
break;
} else {
/* get and link to the next BH. */
bh->bh_next = kmem_cache_alloc(bh_kcache, 0);
bh = bh->bh_next;
}
}
return 0;
}
/* Fills page with its contents from its backing store file. Note that we do
* the zero padding here, instead of higher in the VFS. Might change in the
* future. TODO: make this a block FS generic call. */
int ext2_readpage(struct page_map *pm, struct page *page)
{
int retval;
struct block_device *bdev = pm->pm_host->i_sb->s_bdev;
struct buffer_head *bh;
struct block_request *breq;
void *eobh;
atomic_or(&page->pg_flags, PG_BUFFER);
retval = ext2_mappage(pm, page);
if (retval)
return retval;
/* Build and submit the request */
breq = kmem_cache_alloc(breq_kcache, 0);
if (!breq)
return -ENOMEM;
breq->flags = BREQ_READ;
breq->callback = generic_breq_done;
breq->data = 0;
sem_init_irqsave(&breq->sem, 0);
breq->bhs = breq->local_bhs;
breq->nr_bhs = 0;
/* Pack the BH pointers in the block request */
bh = (struct buffer_head*)page->pg_private;
assert(bh);
/* Either read the block in, or zero the buffer. If we wanted to ensure no
* data is leaked after a crash, we'd write a 0 block too. */
for (int i = 0; bh; bh = bh->bh_next) {
if (!(bh->bh_flags & BH_NEEDS_ZEROED)) {
breq->bhs[i] = bh;
breq->nr_bhs++;
i++;
} else {
memset(bh->bh_buffer, 0, pm->pm_host->i_sb->s_blocksize);
bh->bh_flags |= BH_DIRTY;
atomic_or(&bh->bh_page->pg_flags, PG_DIRTY);
}
}
retval = bdev_submit_request(bdev, breq);
assert(!retval);
sleep_on_breq(breq);
kmem_cache_free(breq_kcache, breq);
/* zero out whatever is beyond the EOF. we could do this by figuring out
* where the BHs end and zeroing from there, but I'd rather zero from where
* the file ends (which could be in the middle of an FS block */
uintptr_t eof_off;
eof_off = (pm->pm_host->i_size - page->pg_index * PGSIZE);
eof_off = MIN(eof_off, PGSIZE) % PGSIZE;
/* at this point, eof_off is the offset into the page of the EOF, or 0 */
if (eof_off)
memset(eof_off + page2kva(page), 0, PGSIZE - eof_off);
/* Now the page is up to date */
atomic_or(&page->pg_flags, PG_UPTODATE);
/* Useful debugging. Put one higher up if the page is not getting mapped */
//print_pageinfo(page);
return 0;
}
int ext2_writepage(struct page_map *pm, struct page *page)
{
return -1;
}
/* Super Operations */
/* Creates and initializes a new inode. FS specific, yet inode-generic fields
* are filled in. inode-specific fields are filled in in read_inode() based on
* what's on the disk for a given i_no. i_no and i_fop are set by the caller.
*
* Note that this means this inode can be for an inode that is already on disk,
* or it can be used when creating. The i_fop depends on the type of file
* (file, directory, symlink, etc). */
struct inode *ext2_alloc_inode(struct super_block *sb)
{
struct inode *inode = kmem_cache_alloc(inode_kcache, 0);
memset(inode, 0, sizeof(struct inode));
inode->i_op = &ext2_i_op;
inode->i_pm.pm_op = &ext2_pm_op;
return inode;
}
/* FS-specific clean up when an inode is dealloced. this is just cleaning up
* the in-memory version, and only the FS-specific parts. whether or not the
* inode is still on disc is irrelevant. */
void ext2_dealloc_inode(struct inode *inode)
{
kmem_cache_free(ext2_i_kcache, inode->i_fs_info);
}
/* Returns a pointer within a metablock for the disk inode specified by inode.
* Be sure to 'put' your reference (and/or dirty it). */
struct ext2_inode *ext2_get_diskinode(struct inode *inode)
{
uint32_t my_bg_idx, ino_per_blk, my_ino_blk;
struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
struct ext2_block_group *my_bg;
struct ext2_inode *ino_tbl_chunk;
assert(inode->i_ino); /* ino == 0 is a bug */
/* Need to compute the blockgroup and index of the requested inode */
ino_per_blk = inode->i_sb->s_blocksize /
le16_to_cpu(e2sbi->e2sb->s_inode_size);
my_bg_idx = ext2_inode2bgidx(inode);
my_bg = ext2_inode2bg(inode);
/* Figure out which FS block of the inode table we want and read in that
* chunk */
my_ino_blk = le32_to_cpu(my_bg->bg_inode_table) + my_bg_idx / ino_per_blk;
ino_tbl_chunk = ext2_get_metablock(inode->i_sb, my_ino_blk);
return &ino_tbl_chunk[my_bg_idx % ino_per_blk];
}
/* reads the inode data on disk specified by inode->i_ino into the inode.
* basically, it's a "make this inode the one for i_ino (i number)" */
void ext2_read_inode(struct inode *inode)
{
struct ext2_inode *my_ino;
my_ino = ext2_get_diskinode(inode);
/* Have the disk inode now, let's put its info into the VFS inode: */
inode->i_mode = le16_to_cpu(my_ino->i_mode);
switch (inode->i_mode & __S_IFMT) {
case (__S_IFDIR):
inode->i_fop = &ext2_f_op_dir;
break;
case (__S_IFREG):
inode->i_fop = &ext2_f_op_file;
break;
case (__S_IFLNK):
inode->i_fop = &ext2_f_op_sym;
break;
case (__S_IFCHR):
case (__S_IFBLK):
default:
inode->i_fop = &ext2_f_op_file;
warn("[Calm British Accent] Look around you. Unhandled filetype.");
}
inode->i_nlink = le16_to_cpu(my_ino->i_links_cnt);
inode->i_uid = le16_to_cpu(my_ino->i_uid);
inode->i_gid = le16_to_cpu(my_ino->i_gid);
/* technically, for large F_REG, we should | with i_dir_acl */
inode->i_size = le32_to_cpu(my_ino->i_size);
inode->i_atime.tv_sec = le32_to_cpu(my_ino->i_atime);
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_sec = le32_to_cpu(my_ino->i_mtime);
inode->i_mtime.tv_nsec = 0;
inode->i_ctime.tv_sec = le32_to_cpu(my_ino->i_ctime);
inode->i_ctime.tv_nsec = 0;
inode->i_blocks = le32_to_cpu(my_ino->i_blocks);
inode->i_flags = le32_to_cpu(my_ino->i_flags);
inode->i_socket = FALSE; /* for now */
/* Copy over the other inode stuff that isn't in the VFS inode. For now,
* it's just the block pointers */
inode->i_fs_info = kmem_cache_alloc(ext2_i_kcache, 0);
struct ext2_i_info *e2ii = (struct ext2_i_info*)inode->i_fs_info;
for (int i = 0; i < 15; i++)
e2ii->i_block[i] = le32_to_cpu(my_ino->i_block[i]);
/* TODO: (HASH) unused: inode->i_hash add to hash (saves on disc reading) */
/* TODO: we could consider saving a pointer to the disk inode and pinning
* its buffer in memory, but for now we'll just free it. */
ext2_put_metablock(inode->i_sb, my_ino);
}
/* called when an inode in memory is modified (journalling FS's care) */
void ext2_dirty_inode(struct inode *inode)
{
// presumably we'll ext2_dirty_metablock(void *buffer) here
}
/* write the inode to disk (specifically, to inode inode->i_ino), synchronously
* if we're asked to wait */
void ext2_write_inode(struct inode *inode, bool wait)
{
I_AM_HERE;
}
/* called when an inode is decref'd, to do any FS specific work */
void ext2_put_inode(struct inode *inode)
{
I_AM_HERE;
}
/* Unused for now, will get rid of this if inode_release is sufficient */
void ext2_drop_inode(struct inode *inode)
{
I_AM_HERE;
}
/* delete the inode from disk (all data) */
void ext2_delete_inode(struct inode *inode)
{
I_AM_HERE;
// would remove from "disk" here
/* TODO: give up our i_ino */
}
/* unmount and release the super block */
void ext2_put_super(struct super_block *sb)
{
panic("Shazbot! Ext2 can't be unmounted yet!");
}
/* updates the on-disk SB with the in-memory SB */
void ext2_write_super(struct super_block *sb)
{
I_AM_HERE;
}
/* syncs FS metadata with the disc, synchronously if we're waiting. this info
* also includes anything pointed to by s_fs_info. */
int ext2_sync_fs(struct super_block *sb, bool wait)
{
I_AM_HERE;
return 0;
}
/* remount the FS with the new flags */
int ext2_remount_fs(struct super_block *sb, int flags, char *data)
{
warn("Ext2 will not remount.");
return -1; // can't remount
}
/* interrupts a mount operation - used by NFS and friends */
void ext2_umount_begin(struct super_block *sb)
{
panic("Cannot abort a Ext2 mount, and why would you?");
}
/* inode_operations */
/* Little helper, used for initializing new inodes for file-like objects (files,
* symlinks, etc). We pass the dentry, since we need to up it. */
static void ext2_init_inode(struct inode *dir, struct dentry *dentry)
{
#if 0
struct inode *inode = dentry->d_inode;
inode->i_ino = ext2_get_free_ino();
#endif
}
/* Initializes a new/empty disk inode, according to inode. If you end up not
* zeroing this stuff, be careful of endianness. */
static void ext2_init_diskinode(struct ext2_inode *e2i, struct inode *inode)
{
assert(inode->i_size == 0);
e2i->i_mode = cpu_to_le16(inode->i_mode);
e2i->i_uid = cpu_to_le16(inode->i_uid);
e2i->i_size = 0;
e2i->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
e2i->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
e2i->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
e2i->i_dtime = 0;
e2i->i_gid = cpu_to_le16(inode->i_gid);
e2i->i_links_cnt = cpu_to_le16(inode->i_nlink);
e2i->i_blocks = 0;
e2i->i_flags = cpu_to_le32(inode->i_flags);
e2i->i_osd1 = 0;
e2i->i_generation = 0;
e2i->i_file_acl = 0;
e2i->i_dir_acl = 0;
e2i->i_faddr = 0;
for (int i = 0; i < 15; i++)
e2i->i_block[i] = 0;
for (int i = 0; i < 12; i++)
e2i->i_osd2[i] = 0;
}
/* These should return true if foreach_dirent should stop working on the
* dirents. */
typedef bool (*each_func_t) (struct ext2_dirent *dir_i, long a1, long a2,
long a3);
/* Loads the buffer and performs my_work on each dirent, stopping and returning
* 0 if one of the calls succeeded, or returning the dir block num of what would
* be the next dir block otherwise (aka, how many blocks we went through). */
static uint32_t ext2_foreach_dirent(struct inode *dir, each_func_t my_work,
long a1, long a2, long a3)
{
struct ext2_dirent *dir_buf, *dir_i;
uint32_t dir_block = 0;
dir_buf = ext2_get_ino_metablock(dir, dir_block++);
dir_i = dir_buf;
/* now we have the first block worth of dirents. We'll get another block if
* dir_i hits a block boundary */
for (unsigned int bytes = 0; bytes < dir->i_size; ) {
/* On subsequent loops, we might need to advance to the next block.
* This is where a file abstraction for a dir might be easier. */
if ((void*)dir_i >= (void*)dir_buf + dir->i_sb->s_blocksize) {
ext2_put_metablock(dir->i_sb, dir_buf);
dir_buf = ext2_get_ino_metablock(dir, dir_block++);
dir_i = dir_buf;
assert(dir_buf);
}
if (my_work(dir_i, a1, a2, a3)) {
ext2_put_metablock(dir->i_sb, dir_buf);
return 0;
}
/* Get ready for the next loop */
bytes += dir_i->dir_reclen;
dir_i = (void*)dir_i + dir_i->dir_reclen;
}
ext2_put_metablock(dir->i_sb, dir_buf);
return dir_block;
}
/* Returns the actual length of a dirent, not just how far to the next entry.
* If there is no inode, the entry is unused, and it has no length (as far as
* users of this should care). */
static unsigned int ext2_dirent_len(struct ext2_dirent *e2dir)
{
/* arguably, we don't need the le32_to_cpu */
if (le32_to_cpu(e2dir->dir_inode))
return ROUNDUP(e2dir->dir_namelen + 8, 4); /* no such le8_to_cpu */
else
return 0;
}
/* Helper for writing the contents of a dentry to a disk dirent. Zeroes the
* contents of the dirent so that we don't write random data to disk. */
static void ext2_write_dirent(struct ext2_dirent *e2dir, struct dentry *dentry,
unsigned int rec_len)
{
memset(e2dir, 0, sizeof(*e2dir));
e2dir->dir_inode = cpu_to_le32(dentry->d_inode->i_ino);
e2dir->dir_reclen = cpu_to_le16(rec_len);
e2dir->dir_namelen = dentry->d_name.len;
switch (dentry->d_inode->i_mode & __S_IFMT) {
case (__S_IFDIR):
e2dir->dir_filetype = EXT2_FT_DIR;
break;
case (__S_IFREG):
e2dir->dir_filetype = EXT2_FT_REG_FILE;
break;
case (__S_IFLNK):
e2dir->dir_filetype = EXT2_FT_SYMLINK;
break;
case (__S_IFCHR):
e2dir->dir_filetype = EXT2_FT_CHRDEV;
break;
case (__S_IFBLK):
e2dir->dir_filetype = EXT2_FT_BLKDEV;
break;
case (__S_IFSOCK):
e2dir->dir_filetype = EXT2_FT_SOCK;
break;
default:
warn("[Calm British Accent] Look around you: Unknown filetype.");
e2dir->dir_filetype = EXT2_FT_UNKNOWN;
}
assert(dentry->d_name.len <= 255);
strlcpy((char*)e2dir->dir_name, dentry->d_name.name,
sizeof(e2dir->dir_name));
}
/* Helper for ext2_create(). This tries to squeeze a dirent in the slack space
* after an existing dirent, returning TRUE if it succeeded (to break out). */
static bool create_each_func(struct ext2_dirent *dir_i, long a1, long a2,
long a3)
{
struct dentry *dentry = (struct dentry*)a1;
unsigned int our_rec_len = (unsigned int)a2;
unsigned int mode = (unsigned int)a3;
struct ext2_dirent *dir_new;
unsigned int real_len = ext2_dirent_len(dir_i);
/* How much room is available after this dir_i before the next one */
unsigned int record_slack = le16_to_cpu(dir_i->dir_reclen) - real_len;
/* TODO: Note that this technique will clobber any directory indexing. They
* exist after the .. entry with an inode of 0. Check the docs for
* specifics and think up a nice way to tell the diff between a reserved
* entry and an unused one, when inode == 0. */
if (record_slack < our_rec_len)
return FALSE;
/* At this point, there is enough room for us. Stick our new one in right
* after the real len, making sure our reclen goes to the old end. Note
* that it is possible to have a real_len of 0 (an unused entry). In this
* case, we just end up taking over the spot in the dir_blk. Be sure to set
* dir_i's reclen before dir_new's (in case they are the same). */
dir_new = ((void*)dir_i + real_len);
dir_i->dir_reclen = cpu_to_le16(real_len);
ext2_write_dirent(dir_new, dentry, record_slack);
ext2_dirty_metablock(dentry->d_sb, dir_new);
return TRUE;
}
/* Called when creating a new disk inode in dir associated with dentry. We need
* to fill out the i_ino, set the type, and do whatever else we need */
int ext2_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct ext2_block_group *dir_bg = ext2_inode2bg(dir);
struct ext2_inode *disk_inode;
struct ext2_i_info *e2ii;
uint32_t dir_block;
unsigned int our_rec_len;
struct ext2_dirent *new_dirent;
/* Set basic inode stuff for files, get a disk inode, etc */
SET_FTYPE(inode->i_mode, __S_IFREG);
inode->i_fop = &ext2_f_op_file;
inode->i_ino = ext2_alloc_diskinode(inode, dir_bg);
/* Initialize disk inode (this will be different for short symlinks) */
disk_inode = ext2_get_diskinode(inode);
ext2_init_diskinode(disk_inode, inode);
/* Initialize the e2ii (might get rid of this cache of block info) */
inode->i_fs_info = kmem_cache_alloc(ext2_i_kcache, 0);
e2ii = (struct ext2_i_info*)inode->i_fs_info;
for (int i = 0; i < 15; i++)
e2ii->i_block[i] = le32_to_cpu(disk_inode->i_block[i]);
/* Dirty and put the disk inode */
ext2_dirty_metablock(dentry->d_sb, disk_inode);
ext2_put_metablock(dentry->d_sb, disk_inode);
/* Insert it in the directory (make a dirent, might expand the dir too) */
/* Note the disk dir_name is not null terminated */
our_rec_len = ROUNDUP(8 + dentry->d_name.len, 4);
assert(our_rec_len <= 8 + 256);
/* Consider caching the start point for future dirent ops. Or even using
* the indexed directory.... */
dir_block = ext2_foreach_dirent(dir, create_each_func, (long)dentry,
(long)our_rec_len, (long)mode);
/* If this returned a block number, we didn't find room in any of the
* existing directory blocks, so we need to make a new one, stick it in the
* dir inode, and stick our dirent at the beginning. The reclen is the
* whole blocksize (since it's the last entry in this block) */
if (dir_block) {
new_dirent = ext2_get_ino_metablock(dir, dir_block);
ext2_write_dirent(new_dirent, dentry, dentry->d_sb->s_blocksize);
ext2_dirty_metablock(dentry->d_sb, new_dirent);
ext2_put_metablock(dentry->d_sb, new_dirent);
}
return 0;
}
/* If we match, this loads the inode for the dentry and returns true (so we
* break out) */
static bool lookup_each_func(struct ext2_dirent *dir_i, long a1, long a2,
long a3)
{
struct dentry *dentry = (struct dentry*)a1;
/* Test if we're the one (TODO: use d_compare). Note, dir_name is not
* null terminated, hence the && test. */
if (!strncmp((char*)dir_i->dir_name, dentry->d_name.name,
dir_i->dir_namelen) &&
(dentry->d_name.name[dir_i->dir_namelen] == '\0')) {
load_inode(dentry, (long)le32_to_cpu(dir_i->dir_inode));
/* TODO: (HASH) add dentry to dcache (maybe the caller should) */
return TRUE;
}
return FALSE;
}
/* Searches the directory for the filename in the dentry, filling in the dentry
* with the FS specific info of this file. If it succeeds, it will pass back
* the *dentry you should use (which might be the same as the one you passed in).
* If this fails, it will return 0, but not free the memory of "dentry."
*
* Callers, make sure you alloc and fill out the name parts of the dentry. We
* don't currently use the ND. Might remove it in the future. */
struct dentry *ext2_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
assert(S_ISDIR(dir->i_mode));
struct ext2_dirent *dir_buf, *dir_i;
if (!ext2_foreach_dirent(dir, lookup_each_func, (long)dentry, 0, 0))
return dentry;
printd("EXT2: Not Found, %s\n", dentry->d_name.name);
return 0;
}
/* Hard link to old_dentry in directory dir with a name specified by new_dentry.
* At the very least, set the new_dentry's FS-specific fields. */
int ext2_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
{
I_AM_HERE;
assert(new_dentry->d_op = &ext2_d_op);
return 0;
}
/* Removes the link from the dentry in the directory */
int ext2_unlink(struct inode *dir, struct dentry *dentry)
{
I_AM_HERE;
return 0;
}
/* Creates a new inode for a symlink dir, linking to / containing the name
* symname. dentry is the controlling dentry of the inode. */
int ext2_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
I_AM_HERE;
#if 0
struct inode *inode = dentry->d_inode;
SET_FTYPE(inode->i_mode, __S_IFLNK);
inode->i_fop = &ext2_f_op_sym;
strlcpy(string, symname, len + 1);
#endif
return 0;
}
/* Called when creating a new inode for a directory associated with dentry in
* dir with the given mode. Note, we might (later) need to track subdirs within
* the parent inode, like we do with regular files. I'd rather not, so we'll
* see if we need it. */
int ext2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
I_AM_HERE;
#if 0
struct inode *inode = dentry->d_inode;
inode->i_ino = ext2_get_free_ino();
SET_FTYPE(inode->i_mode, __S_IFDIR);
inode->i_fop = &ext2_f_op_dir;
#endif
return 0;
}
/* Removes from dir the directory 'dentry.' Ext2 doesn't store anything in the
* inode for which children it has. It probably should, but since everything is
* pinned, it just relies on the dentry connections. */
int ext2_rmdir(struct inode *dir, struct dentry *dentry)
{
I_AM_HERE;
return 0;
}
/* Used to make a generic file, based on the type and the major/minor numbers
* (in rdev), with the given mode. As with others, this creates a new disk
* inode for the file */
int ext2_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
{
I_AM_HERE;
return -1;
}
/* Moves old_dentry from old_dir to new_dentry in new_dir */
int ext2_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
I_AM_HERE;
return -1;
}
/* Returns the char* for the symname for the given dentry. The VFS code that
* calls this for real FS's might assume it's already read in, so if the char *
* isn't already in memory, we'd need to read it in here. Regarding the char*
* storage, the char* only will last as long as the dentry and inode are in
* memory. */
char *ext2_readlink(struct dentry *dentry)
{
I_AM_HERE;
struct inode *inode = dentry->d_inode;
if (!S_ISLNK(inode->i_mode))
return 0;
return 0;
}
/* Modifies the size of the file of inode to whatever its i_size is set to */
void ext2_truncate(struct inode *inode)
{
}
/* Checks whether the the access mode is allowed for the file belonging to the
* inode. Implies that the permissions are on the file, and not the hardlink */
int ext2_permission(struct inode *inode, int mode, struct nameidata *nd)
{
return -1;
}
/* dentry_operations */
/* Determines if the dentry is still valid before using it to translate a path.
* Network FS's need to deal with this. */
int ext2_d_revalidate(struct dentry *dir, struct nameidata *nd)
{ // default, nothing
return -1;
}
/* Compares name1 and name2. name1 should be a member of dir. */
int ext2_d_compare(struct dentry *dir, struct qstr *name1, struct qstr *name2)
{ // default, string comp (case sensitive)
return -1;
}
/* Called when the last ref is deleted (refcnt == 0) */
int ext2_d_delete(struct dentry *dentry)
{ // default, nothin
return -1;
}
/* Called when it's about to be slab-freed */
int ext2_d_release(struct dentry *dentry)
{
return -1;
}
/* Called when the dentry loses it's inode (becomes "negative") */
void ext2_d_iput(struct dentry *dentry, struct inode *inode)
{ // default, call i_put to release the inode object
}
/* file_operations */
/* Updates the file pointer. TODO: think about locking, and putting this in the
* VFS. */
#include <syscall.h> /* just for set_errno, may go away later */
int ext2_llseek(struct file *file, off64_t offset, off64_t *ret, int whence)
{
off64_t temp_off = 0;
switch (whence) {
case SEEK_SET:
temp_off = offset;
break;
case SEEK_CUR:
temp_off = file->f_pos + offset;
break;
case SEEK_END:
temp_off = file->f_dentry->d_inode->i_size + offset;
break;
default:
set_errno(EINVAL);
warn("Unknown 'whence' in llseek()!\n");
return -1;
}
file->f_pos = temp_off;
*ret = temp_off;
return 0;
}
/* Fills in the next directory entry (dirent), starting with d_off. Like with
* read and write, there will be issues with userspace and the *dirent buf.
* TODO: (UMEM) */
int ext2_readdir(struct file *dir, struct dirent *dirent)
{
void *blk_buf;
/* Not enough data at the end of the directory */
if (dir->f_dentry->d_inode->i_size < dirent->d_off + 8)
return -ENOENT;
/* Figure out which block we need to read in for dirent->d_off */
int block = dirent->d_off / dir->f_dentry->d_sb->s_blocksize;
blk_buf = ext2_get_ino_metablock(dir->f_dentry->d_inode, block);
assert(blk_buf);
off64_t f_off = dirent->d_off % dir->f_dentry->d_sb->s_blocksize;
/* Copy out the dirent info */
struct ext2_dirent *e2dir = (struct ext2_dirent*)(blk_buf + f_off);
dirent->d_ino = le32_to_cpu(e2dir->dir_inode);
dirent->d_off += le16_to_cpu(e2dir->dir_reclen);
if (dir->f_dentry->d_inode->i_size < dirent->d_off)
panic("Something is jacked with the dirent going beyond the dir/file");
/* note, dir_namelen doesn't include the \0 */
dirent->d_reclen = e2dir->dir_namelen;
assert(e2dir->dir_namelen <= MAX_FILENAME_SZ);
strlcpy(dirent->d_name, (char*)e2dir->dir_name, e2dir->dir_namelen + 1);
ext2_put_metablock(dir->f_dentry->d_sb, blk_buf);
/* At the end of the directory, sort of. ext2 often preallocates blocks, so
* this will cause us to walk along til the end, which isn't quite right. */
if (dir->f_dentry->d_inode->i_size == dirent->d_off)
return 0;
if (dir->f_dentry->d_inode->i_size < dirent->d_off) {
warn("Issues reaching the end of an ext2 directory!");
return 0;
}
return 1; /* normal success for readdir */
}
/* This is called when a VMR is mapping a particular file. The FS needs to do
* whatever it needs so that faults can be handled by read_page(), and handle all
* of the cases of MAP_SHARED, MAP_PRIVATE, whatever. It also needs to ensure
* the file is not being mmaped in a way that conflicts with the manner in which
* the file was opened or the file type. */
int ext2_mmap(struct file *file, struct vm_region *vmr)
{
if (S_ISREG(file->f_dentry->d_inode->i_mode))
return 0;
return -1;
}
/* Called by the VFS while opening the file, which corresponds to inode, for
* the FS to do whatever it needs. */
int ext2_open(struct inode *inode, struct file *file)
{
/* TODO: check to make sure the file is openable, and maybe do some checks
* for the open mode (like did we want to truncate, append, etc) */
return 0;
}
/* Called when a file descriptor is closed. */
int ext2_flush(struct file *file)
{
I_AM_HERE;
return -1;
}
/* Called when the file is about to be closed (file obj freed) */
int ext2_release(struct inode *inode, struct file *file)
{
return 0;
}
/* Flushes the file's dirty contents to disc */
int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
{
return -1;
}
/* Traditionally, sleeps until there is file activity. We probably won't
* support this, or we'll handle it differently. */
unsigned int ext2_poll(struct file *file, struct poll_table_struct *poll_table)
{
return -1;
}
/* Reads count bytes from a file, starting from (and modifiying) offset, and
* putting the bytes into buffers described by vector */
ssize_t ext2_readv(struct file *file, const struct iovec *vector,
unsigned long count, off64_t *offset)
{
return -1;
}
/* Writes count bytes to a file, starting from (and modifiying) offset, and
* taking the bytes from buffers described by vector */
ssize_t ext2_writev(struct file *file, const struct iovec *vector,
unsigned long count, off64_t *offset)
{
return -1;
}
/* Write the contents of file to the page. Will sort the params later */
ssize_t ext2_sendpage(struct file *file, struct page *page, int offset,
size_t size, off64_t pos, int more)
{
return -1;
}
/* Checks random FS flags. Used by NFS. */
int ext2_check_flags(int flags)
{ // default, nothing
return -1;
}
/* Redeclaration and initialization of the FS ops structures */
struct page_map_operations ext2_pm_op = {
ext2_readpage,
ext2_writepage,
};
struct super_operations ext2_s_op = {
ext2_alloc_inode,
ext2_dealloc_inode,
ext2_read_inode,
ext2_dirty_inode,
ext2_write_inode,
ext2_put_inode,
ext2_drop_inode,
ext2_delete_inode,
ext2_put_super,
ext2_write_super,
ext2_sync_fs,
ext2_remount_fs,
ext2_umount_begin,
};
struct inode_operations ext2_i_op = {
ext2_create,
ext2_lookup,
ext2_link,
ext2_unlink,
ext2_symlink,
ext2_mkdir,
ext2_rmdir,
ext2_mknod,
ext2_rename,
ext2_readlink,
ext2_truncate,
ext2_permission,
};
struct dentry_operations ext2_d_op = {
ext2_d_revalidate,
generic_dentry_hash,
ext2_d_compare,
ext2_d_delete,
ext2_d_release,
ext2_d_iput,
};
struct file_operations ext2_f_op_file = {
ext2_llseek,
generic_file_read,
generic_file_write,
ext2_readdir,
ext2_mmap,
ext2_open,
ext2_flush,
ext2_release,
ext2_fsync,
ext2_poll,
ext2_readv,
ext2_writev,
ext2_sendpage,
ext2_check_flags,
};
struct file_operations ext2_f_op_dir = {
ext2_llseek,
generic_dir_read,
0,
ext2_readdir,
ext2_mmap,
ext2_open,
ext2_flush,
ext2_release,
ext2_fsync,
ext2_poll,
ext2_readv,
ext2_writev,
ext2_sendpage,
ext2_check_flags,
};
struct file_operations ext2_f_op_sym = {
ext2_llseek,
generic_file_read,
generic_file_write,
ext2_readdir,
ext2_mmap,
ext2_open,
ext2_flush,
ext2_release,
ext2_fsync,
ext2_poll,
ext2_readv,
ext2_writev,
ext2_sendpage,
ext2_check_flags,
};