| /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved. | 
 |  * Portions Copyright © 1997-1999 Vita Nuova Limited | 
 |  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited | 
 |  *                                (www.vitanuova.com) | 
 |  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others | 
 |  * | 
 |  * Modified for the Akaros operating system: | 
 |  * Copyright (c) 2013-2014 The Regents of the University of California | 
 |  * Copyright (c) 2013-2015 Google Inc. | 
 |  * | 
 |  * Permission is hereby granted, free of charge, to any person obtaining a copy | 
 |  * of this software and associated documentation files (the "Software"), to deal | 
 |  * in the Software without restriction, including without limitation the rights | 
 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
 |  * copies of the Software, and to permit persons to whom the Software is | 
 |  * furnished to do so, subject to the following conditions: | 
 |  * | 
 |  * The above copyright notice and this permission notice shall be included in | 
 |  * all copies or substantial portions of the Software. | 
 |  * | 
 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE | 
 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
 |  * SOFTWARE. */ | 
 |  | 
 | #include <slab.h> | 
 | #include <kmalloc.h> | 
 | #include <kref.h> | 
 | #include <string.h> | 
 | #include <stdio.h> | 
 | #include <assert.h> | 
 | #include <error.h> | 
 | #include <cpio.h> | 
 | #include <pmap.h> | 
 | #include <smp.h> | 
 | #include <net/ip.h> | 
 | #include <process.h> | 
 |  | 
 | /* Note that Hdrspc is only available via padblock (to the 'left' of the rp). */ | 
 | enum { | 
 | 	Hdrspc = 128,		/* leave room for high-level headers */ | 
 | 	Bdead = 0x51494F42,	/* "QIOB" */ | 
 | 	BLOCKALIGN = 32,	/* was the old BY2V in inferno, which was 8 */ | 
 | }; | 
 |  | 
 | /* | 
 |  *  allocate blocks (round data base address to 64 bit boundary). | 
 |  *  if mallocz gives us more than we asked for, leave room at the front | 
 |  *  for header. | 
 |  */ | 
 | struct block *block_alloc(size_t size, int mem_flags) | 
 | { | 
 | 	struct block *b; | 
 | 	uintptr_t addr; | 
 | 	int n; | 
 |  | 
 | 	/* If Hdrspc is not block aligned it will cause issues. */ | 
 | 	static_assert(Hdrspc % BLOCKALIGN == 0); | 
 |  | 
 | 	b = kmalloc(sizeof(struct block) + size + Hdrspc + (BLOCKALIGN - 1), | 
 | 				mem_flags); | 
 | 	if (b == NULL) | 
 | 		return NULL; | 
 |  | 
 | 	b->next = NULL; | 
 | 	b->list = NULL; | 
 | 	b->free = NULL; | 
 | 	b->flag = 0; | 
 | 	b->extra_len = 0; | 
 | 	b->nr_extra_bufs = 0; | 
 | 	b->extra_data = 0; | 
 | 	b->mss = 0; | 
 | 	b->network_offset = 0; | 
 | 	b->transport_offset = 0; | 
 |  | 
 | 	addr = (uintptr_t) b; | 
 | 	addr = ROUNDUP(addr + sizeof(struct block), BLOCKALIGN); | 
 | 	b->base = (uint8_t *) addr; | 
 | 	/* TODO: support this */ | 
 | 	/* interesting. We can ask the allocator, after allocating, | 
 | 	 * the *real* size of the block we got. Very nice. | 
 | 	 * Not on akaros yet. | 
 | 	 b->lim = ((uint8_t*)b) + msize(b); | 
 | 	 * See use of n in commented code below | 
 | 	 */ | 
 | 	b->lim = ((uint8_t *) b) + sizeof(struct block) + size + Hdrspc + | 
 | 		(BLOCKALIGN - 1); | 
 | 	b->rp = b->base; | 
 | 	/* TODO: support this */ | 
 | 	/* n is supposed to be Hdrspc + rear padding + extra reserved memory, | 
 | 	 * but since we don't currently support checking how much memory was | 
 | 	 * actually reserved, this is always Hdrspc + rear padding. After | 
 | 	 * rounding that down to BLOCKALIGN, it's always Hdrpsc since the | 
 | 	 * padding is < BLOCKALIGN. | 
 | 	 n = b->lim - b->base - size; | 
 | 	 b->rp += n & ~(BLOCKALIGN - 1); | 
 | 	 */ | 
 | 	b->rp += Hdrspc; | 
 | 	b->wp = b->rp; | 
 | 	/* b->base is aligned, rounded up from b | 
 | 	 * b->lim is the upper bound on our malloc | 
 | 	 * b->rp is advanced by some aligned amount, based on how much extra we | 
 | 	 * received from kmalloc and the Hdrspc. */ | 
 | 	return b; | 
 | } | 
 |  | 
 | /* Makes sure b has nr_bufs extra_data.  Will grow, but not shrink, an existing | 
 |  * extra_data array.  When growing, it'll copy over the old entries.  All new | 
 |  * entries will be zeroed.  mem_flags determines if we'll block on kmallocs. | 
 |  * | 
 |  * Return 0 on success or -1 on error. | 
 |  * Caller is responsible for concurrent access to the block's metadata. */ | 
 | int block_add_extd(struct block *b, unsigned int nr_bufs, int mem_flags) | 
 | { | 
 | 	unsigned int old_nr_bufs = b->nr_extra_bufs; | 
 | 	size_t old_amt = sizeof(struct extra_bdata) * old_nr_bufs; | 
 | 	size_t new_amt = sizeof(struct extra_bdata) * nr_bufs; | 
 | 	void *new_bdata; | 
 |  | 
 | 	if (old_nr_bufs >= nr_bufs) | 
 | 		return 0; | 
 | 	if (b->extra_data) { | 
 | 		new_bdata = krealloc(b->extra_data, new_amt, mem_flags); | 
 | 		if (!new_bdata) | 
 | 			return -1; | 
 | 		memset(new_bdata + old_amt, 0, new_amt - old_amt); | 
 | 	} else { | 
 | 		new_bdata = kzmalloc(new_amt, mem_flags); | 
 | 		if (!new_bdata) | 
 | 			return - 1; | 
 | 	} | 
 | 	b->extra_data = new_bdata; | 
 | 	b->nr_extra_bufs = nr_bufs; | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* Go backwards from the end of the list, remember the last unused slot, and | 
 |  * stop when a used slot is encountered. */ | 
 | static struct extra_bdata *next_unused_slot(struct block *b) | 
 | { | 
 | 	struct extra_bdata *ebd = NULL; | 
 |  | 
 | 	for (int i = b->nr_extra_bufs - 1; i >= 0; i--) { | 
 | 		if (b->extra_data[i].base) | 
 | 			break; | 
 | 		ebd = &b->extra_data[i]; | 
 | 	} | 
 | 	return ebd; | 
 | } | 
 |  | 
 | /* Append an extra data buffer @base with offset @off of length @len to block | 
 |  * @b.  Reuse an unused extra data slot if there's any. | 
 |  * Return 0 on success or -1 on error. */ | 
 | int block_append_extra(struct block *b, uintptr_t base, uint32_t off, | 
 |                        uint32_t len, int mem_flags) | 
 | { | 
 | 	unsigned int nr_bufs = b->nr_extra_bufs + 1; | 
 | 	struct extra_bdata *ebd; | 
 |  | 
 | 	ebd = next_unused_slot(b); | 
 | 	if (!ebd) { | 
 | 		if (block_add_extd(b, nr_bufs, mem_flags) != 0) | 
 | 			return -1; | 
 | 		ebd = next_unused_slot(b); | 
 | 		assert(ebd); | 
 | 	} | 
 | 	ebd->base = base; | 
 | 	ebd->off = off; | 
 | 	ebd->len = len; | 
 | 	b->extra_len += ebd->len; | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* There's metadata in each block related to the data payload.  For instance, | 
 |  * the TSO mss, the offsets to various headers, whether csums are needed, etc. | 
 |  * When you create a new block, like in copyblock, this will copy those bits | 
 |  * over. */ | 
 | void block_copy_metadata(struct block *new_b, struct block *old_b) | 
 | { | 
 | 	new_b->flag |= (old_b->flag & BLOCK_META_FLAGS); | 
 | 	new_b->tx_csum_offset = old_b->tx_csum_offset; | 
 | 	new_b->mss = old_b->mss; | 
 | 	new_b->network_offset = old_b->network_offset; | 
 | 	new_b->transport_offset = old_b->transport_offset; | 
 | 	new_b->free = old_b->free; | 
 |  | 
 | 	/* This is probably OK.  Right now, no one calls us with a blocklist. | 
 | 	 * Any callers that do would need to manage 'next', either to avoid | 
 | 	 * leaking memory (of old_b is freed) or to have multiple pointers to | 
 | 	 * the same block (if new_b is a copy for e.g. snoop). */ | 
 | 	warn_on(old_b->next); | 
 | } | 
 |  | 
 | void block_reset_metadata(struct block *b) | 
 | { | 
 | 	b->flag &= ~BLOCK_META_FLAGS; | 
 | 	b->tx_csum_offset = 0; | 
 | 	b->mss = 0; | 
 | 	b->network_offset = 0; | 
 | 	b->transport_offset = 0; | 
 | 	b->free = NULL; | 
 | } | 
 |  | 
 | /* Adds delta (which may be negative) to the block metadata offsets that are | 
 |  * relative to b->rp. */ | 
 | void block_add_to_offsets(struct block *b, int delta) | 
 | { | 
 | 	/* Note we do not add to tx_csum_offset.  That is relative to | 
 | 	 * transport_offset */ | 
 | 	b->network_offset += delta; | 
 | 	b->transport_offset += delta; | 
 | } | 
 |  | 
 | /* Transfers extra data from old to new.  This is not a copy nor a | 
 |  * qclone/refcount increase on the extra data blobs.  The old block loses the | 
 |  * data.  This changes BLEN for both, but not BHLEN.  'new' may have preexisting | 
 |  * ebds. */ | 
 | void block_transfer_extras(struct block *new, struct block *old) | 
 | { | 
 | 	struct extra_bdata *ebd; | 
 |  | 
 | 	for (int i = 0; i < old->nr_extra_bufs; i++) { | 
 | 		ebd = &old->extra_data[i]; | 
 | 		if (!ebd->base || !ebd->len) | 
 | 			continue; | 
 | 		block_append_extra(new, ebd->base, ebd->off, ebd->len, | 
 | 				   MEM_WAIT); | 
 | 	} | 
 |  | 
 | 	old->extra_len = 0; | 
 | 	old->nr_extra_bufs = 0; | 
 | 	kfree(old->extra_data); | 
 | 	old->extra_data = NULL; | 
 | } | 
 |  | 
 | /* Like block_transfer_extras(), but new may not have preexisting ebds. */ | 
 | void block_replace_extras(struct block *new, struct block *old) | 
 | { | 
 | 	assert(!new->extra_data); | 
 | 	new->extra_len = old->extra_len; | 
 | 	new->nr_extra_bufs = old->nr_extra_bufs; | 
 | 	new->extra_data = old->extra_data; | 
 | 	old->extra_len = 0; | 
 | 	old->nr_extra_bufs = 0; | 
 | 	old->extra_data = NULL; | 
 | } | 
 |  | 
 | /* Given a block, return a block with identical content but as if you allocated | 
 |  * it freshly with 'size', meaning with size bytes in the header/main body, some | 
 |  * of which contain the block's main body data in the new block.  Note all | 
 |  * blocks have an extra Hdrspc bytes to the left that is not counted. | 
 |  * | 
 |  * One thing to consider is a block that has 'moved to the right' in its main | 
 |  * body.  i.e. it used to have data, such as TCP/IP headers, but we've since | 
 |  * incremented b->rp.  We're near the end of the buffer and lim - wp is small. | 
 |  * This will give us a new block with the existing contents at the new 'default' | 
 |  * rp.  The old data to the left of rp will be gone. | 
 |  * | 
 |  * b may be in a blist.  We'll deal with its next pointer.  If b is in the | 
 |  * middle of a blist or a qio bfirst or blast, then the caller needs to deal | 
 |  * with pointers to it. */ | 
 | struct block *block_realloc(struct block *b, size_t size) | 
 | { | 
 | 	struct block *new; | 
 | 	size_t amt; | 
 |  | 
 | 	/* This means there is enough space for the old block data and the rest | 
 | 	 * of 'size'. */ | 
 | 	if (b->lim - b->wp + BHLEN(b) >= size) | 
 | 		return b; | 
 | 	size = MAX(size, BHLEN(b)); | 
 | 	new = block_alloc(size, MEM_WAIT); | 
 | 	amt = block_copy_to_body(new, b->rp, BHLEN(b)); | 
 | 	assert(amt == BHLEN(b)); | 
 | 	new->next = b->next; | 
 | 	b->next = NULL; | 
 | 	block_copy_metadata(new, b); | 
 | 	block_replace_extras(new, b); | 
 | 	freeb(b); | 
 | 	return new; | 
 | } | 
 |  | 
 | size_t block_copy_to_body(struct block *to, void *from, size_t copy_amt) | 
 | { | 
 | 	copy_amt = MIN(to->lim - to->wp, copy_amt); | 
 | 	memcpy(to->wp, from, copy_amt); | 
 | 	to->wp += copy_amt; | 
 | 	return copy_amt; | 
 | } | 
 |  | 
 | void free_block_extra(struct block *b) | 
 | { | 
 | 	struct extra_bdata *ebd; | 
 |  | 
 | 	/* assuming our release method is kfree, which will change when we | 
 | 	 * support user buffers */ | 
 | 	for (int i = 0; i < b->nr_extra_bufs; i++) { | 
 | 		ebd = &b->extra_data[i]; | 
 | 		if (ebd->base) | 
 | 			kfree((void*)ebd->base); | 
 | 	} | 
 | 	b->extra_len = 0; | 
 | 	b->nr_extra_bufs = 0; | 
 | 	kfree(b->extra_data);	/* harmless if it is 0 */ | 
 | 	b->extra_data = 0; /* in case the block is reused by a free override */ | 
 | } | 
 |  | 
 | /* Frees a block, returning its size (len, not alloc) */ | 
 | size_t freeb(struct block *b) | 
 | { | 
 | 	void *dead = (void *)Bdead; | 
 | 	size_t ret; | 
 |  | 
 | 	if (b == NULL) | 
 | 		return 0; | 
 | 	ret = BLEN(b); | 
 | 	free_block_extra(b); | 
 | 	/* | 
 | 	 * drivers which perform non cache coherent DMA manage their own buffer | 
 | 	 * pool of uncached buffers and provide their own free routine. | 
 | 	 */ | 
 | 	if (b->free) { | 
 | 		b->free(b); | 
 | 		return ret; | 
 | 	} | 
 | 	warn_on(b->next); | 
 | 	/* poison the block in case someone is still holding onto it */ | 
 | 	b->next = dead; | 
 | 	b->rp = dead; | 
 | 	b->wp = dead; | 
 | 	b->lim = dead; | 
 | 	b->base = dead; | 
 | 	kfree(b); | 
 | 	return ret; | 
 | } | 
 |  | 
 | /* Free a list of blocks, returning their total size. */ | 
 | size_t freeblist(struct block *b) | 
 | { | 
 | 	struct block *next; | 
 | 	size_t ret = 0; | 
 |  | 
 | 	for (; b != 0; b = next) { | 
 | 		next = b->next; | 
 | 		b->next = 0; | 
 | 		ret += freeb(b); | 
 | 	} | 
 | 	return ret; | 
 | } | 
 |  | 
 | void checkb(struct block *b, char *msg) | 
 | { | 
 | 	void *dead = (void *)Bdead; | 
 | 	struct extra_bdata *ebd; | 
 | 	size_t extra_len = 0; | 
 |  | 
 | 	if (b == dead) | 
 | 		panic("checkb b %s 0x%lx", msg, b); | 
 | 	if (b->base == dead || b->lim == dead || b->next == dead | 
 | 		|| b->rp == dead || b->wp == dead) { | 
 | 		printd("checkb: base 0x%8.8lx lim 0x%8.8lx next 0x%8.8lx\n", | 
 | 			   b->base, b->lim, b->next); | 
 | 		printd("checkb: rp 0x%8.8lx wp 0x%8.8lx\n", b->rp, b->wp); | 
 | 		panic("checkb dead: %s\n", msg); | 
 | 	} | 
 |  | 
 | 	if (b->base > b->lim) | 
 | 		panic("checkb 0 %s 0x%lx 0x%lx", msg, b->base, b->lim); | 
 | 	if (b->rp < b->base) | 
 | 		panic("checkb 1 %s 0x%lx 0x%lx", msg, b->base, b->rp); | 
 | 	if (b->wp < b->base) | 
 | 		panic("checkb 2 %s 0x%lx 0x%lx", msg, b->base, b->wp); | 
 | 	if (b->rp > b->lim) | 
 | 		panic("checkb 3 %s 0x%lx 0x%lx", msg, b->rp, b->lim); | 
 | 	if (b->wp > b->lim) | 
 | 		panic("checkb 4 %s 0x%lx 0x%lx", msg, b->wp, b->lim); | 
 | 	if (b->nr_extra_bufs && !b->extra_data) | 
 | 		panic("checkb 5 %s missing extra_data", msg); | 
 |  | 
 | 	for (int i = 0; i < b->nr_extra_bufs; i++) { | 
 | 		ebd = &b->extra_data[i]; | 
 | 		if (!ebd->base && (ebd->off || ebd->len)) | 
 | 			panic("checkb %s: ebd %d has no base, but has off %d and len %d", | 
 | 			      msg, i, ebd->off, ebd->len); | 
 | 		if (ebd->base) { | 
 | 			if (!kmalloc_refcnt((void*)ebd->base)) | 
 | 				panic("checkb %s: buf %d, base %p has no refcnt!\n", | 
 | 				      msg, i, ebd->base); | 
 | 			extra_len += ebd->len; | 
 | 		} | 
 | 	} | 
 | 	if (extra_len != b->extra_len) | 
 | 		panic("checkb %s: block extra_len %d differs from sum of ebd len %d", | 
 | 		      msg, b->extra_len, extra_len); | 
 | } | 
 |  | 
 | void printblock(struct block *b) | 
 | { | 
 | 	unsigned char *c; | 
 | 	unsigned int off, elen; | 
 | 	struct extra_bdata *e; | 
 |  | 
 | 	if (b == NULL) { | 
 | 		printk("block is null\n"); | 
 | 		return; | 
 | 	} | 
 |  | 
 | 	print_lock(); | 
 | 	printk("block of BLEN = %d, with %d header and %d data in %d extras\n", | 
 | 	       BLEN(b), BHLEN(b), b->extra_len, b->nr_extra_bufs); | 
 |  | 
 | 	printk("header:\n"); | 
 | 	printk("%2x:\t", 0); | 
 | 	off = 0; | 
 | 	for (c = b->rp; c < b->wp; c++) { | 
 | 		printk("  %02x", *c & 0xff); | 
 | 		off++; | 
 | 		if (off % 8 == 0) { | 
 | 			printk("\n"); | 
 | 			printk("%2x:\t", off); | 
 | 		} | 
 | 	} | 
 | 	printk("\n"); | 
 | 	elen = b->extra_len; | 
 | 	for (int i = 0; (i < b->nr_extra_bufs) && elen; i++) { | 
 | 		e = &b->extra_data[i]; | 
 | 		if (e->len == 0) | 
 | 			continue; | 
 | 		elen -= e->len; | 
 | 		printk("data %d:\n", i); | 
 | 		printk("%2x:\t", 0); | 
 | 		for (off = 0; off < e->len; off++) { | 
 | 			c = (unsigned char *)e->base + e->off + off; | 
 | 			printk("  %02x", *c & 0xff); | 
 | 			if ((off + 1) % 8 == 0 && off +1 < e->len) { | 
 | 				printk("\n"); | 
 | 				printk("%2x:\t", off + 1); | 
 | 			} | 
 | 		} | 
 | 	} | 
 | 	printk("\n"); | 
 | 	print_unlock(); | 
 | } |