kern/src/oprofile/cpu_buffer.c - upstream - Git at Google

 /**
  * @file cpu_buffer.c
  *
  * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
  * @author Barry Kasindorf <barry.kasindorf@amd.com>
  * @author Robert Richter <robert.richter@amd.com>
  *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
  * Eventually each CPU's buffer is processed into the global
  * event buffer by sync_buffer().
  *
  * We use a local buffer for two reasons: an NMI or similar
  * interrupt cannot synchronise, and high sampling rates
  * would lead to catastrophic global synchronisation if
  * a global buffer was used.
  */
 #include "event_buffer.h"
 #include "cpu_buffer.h"
 #include "buffer_sync.h"
 #include "oprof.h"

 #define OP_BUFFER_FLAGS	0

 /* we allocate an array of these and set the pointer in pcpui */
 struct oprofile_cpu_buffer *op_cpu_buffer;

 /* this one queue is used by #K to get all events. */
 static struct queue *opq;

 /* this is run from core 0 for all cpu buffers. */
 static void wq_sync_buffer(void);
 unsigned long oprofile_cpu_buffer_size = 65536;
 unsigned long oprofile_backtrace_depth = 16;

 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
 static int work_enabled;

 /*
  * Resets the cpu buffer to a sane state.
  *
  * reset these to invalid values; the next sample collected will
  * populate the buffer with proper values to initialize the buffer
  */
 static inline void op_cpu_buffer_reset(int cpu)
 {
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

 	cpu_buf->last_is_kernel = -1;
 	cpu_buf->last_proc = NULL;
 	//print_func_exit();
 }

 /* returns the remaining free size of data in the entry */
 static inline
 	int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
 {
 	//print_func_entry();
 	assert(entry->size >= 0);
 	if (!entry->size) {
 		//print_func_exit();
 		return 0;
 	}
 	*entry->data = val;
 	entry->size--;
 	entry->data++;
 	//print_func_exit();
 	return entry->size;
 }

 /* returns the size of data in the entry */
 static inline int op_cpu_buffer_get_size(struct op_entry *entry)
 {
 	//print_func_entry();
 	//print_func_exit();
 	return entry->size;
 }

 /* returns 0 if empty or the size of data including the current value */
 static inline
 	int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
 {
 	//print_func_entry();
 	int size = entry->size;
 	if (!size) {
 		//print_func_exit();
 		return 0;
 	}
 	*val = *entry->data;
 	entry->size--;
 	entry->data++;
 	//print_func_exit();
 	return size;
 }

 unsigned long oprofile_get_cpu_buffer_size(void)
 {
 	//print_func_entry();
 	//print_func_exit();
 	return oprofile_cpu_buffer_size;
 }

 void oprofile_cpu_buffer_inc_smpl_lost(void)
 {
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

 	cpu_buf->sample_lost_overflow++;
 	//print_func_exit();
 }

 void free_cpu_buffers(void)
 {
 	//print_func_entry();
 	kfree(op_cpu_buffer);
 	/* we can just leave the queue set up; it will then always return EOF */
 	//print_func_exit();
 }

 #define RB_EVENT_HDR_SIZE 4

 int alloc_cpu_buffers(void)
 {
 	//print_func_entry();
 	/* should probably start using waserror() here. The fail stuff just gets
 	 * ugly.
 	 */
 	int i;
 	unsigned long buffer_size = oprofile_cpu_buffer_size;
 	unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
 						 RB_EVENT_HDR_SIZE);
 	/* this can get called lots of times. Things might have been freed.
 	 * So be careful.
 	 */
 	/* what limit? No idea. */
 	if (!opq)
 		opq = qopen(1024, 0, NULL, NULL);
 	if (!opq)
 		goto fail;

 	/* we *really* don't want to block. Losing data is better. */
 	qnoblock(opq, 1);
 	if (!op_cpu_buffer) {
 		op_cpu_buffer =
 			kzmalloc(sizeof(*op_cpu_buffer) * num_cpus, KMALLOC_WAIT);
 		if (!op_cpu_buffer)
 			goto fail;

 		for (i = 0; i < num_cpus; i++) {
 			struct oprofile_cpu_buffer *b = &op_cpu_buffer[i];
 			b->last_proc = NULL;
 			b->last_is_kernel = -1;
 			b->tracing = 0;
 			b->buffer_size = buffer_size;
 			b->sample_received = 0;
 			b->sample_lost_overflow = 0;
 			b->backtrace_aborted = 0;
 			b->sample_invalid_eip = 0;
 			b->cpu = i;
 			b->fullqueue = qopen(1024, Qmsg, NULL, NULL);
 			b->emptyqueue = qopen(1024, Qmsg, NULL, NULL);
 			spinlock_init_irqsave(&b->lock);
 		}
 	}

 	//print_func_exit();
 	return 0;

 fail:
 	free_cpu_buffers();
 	//print_func_exit();
 	return -ENOMEM;
 }

 void start_cpu_work(void)
 {
 	//print_func_entry();
 	int i;

 	work_enabled = 1;
 	/* task starts here.
 	   schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
 	 */
 	//print_func_exit();
 }

 void end_cpu_work(void)
 {
 	//print_func_entry();
 	work_enabled = 0;
 	//print_func_exit();
 }

 /* placeholder. Not used yet.
  */
 void flush_cpu_work(void)
 {
 	//print_func_entry();
 	int i;
 	struct oprofile_cpu_buffer *b = &op_cpu_buffer[core_id()];

 	//print_func_exit();
 }

 /* Not used since we're not doing per-cpu buffering yet.
  */

 struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
 {
 	//print_func_entry();
 	//print_func_exit();
 	return NULL;
 }

 static struct block *op_cpu_buffer_write_reserve(struct oprofile_cpu_buffer *cpu_buf,
 	struct op_entry *entry, int size)
 {
 	//print_func_entry();
 	struct block *b;
 	int totalsize = sizeof(struct op_sample) +
 		size * sizeof(entry->sample->data[0]);

 	b = cpu_buf->block;
 	/* we might have run out. */
 	if ((! b) || (b->lim - b->wp) < size) {
 		if (b){
 			qibwrite(opq, b);
 		}
 		/* For now. Later, we will grab a block off the
 		 * emptyblock queue.
 		 */
 		cpu_buf->block = b = iallocb(oprofile_cpu_buffer_size);
 		if (!b) {
 			printk("%s: fail\n", __func__);
 			//print_func_exit();
 			return NULL;
 		}
 	}
 	entry->sample = (void *)b->wp;
 	entry->size = size;
 	entry->data = entry->sample->data;

 	b->wp += totalsize;
 	//print_func_exit();
 	return b;

 }

 static int
 op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
 			int is_kernel, struct proc *proc)
 {
 	//print_func_entry();
 	struct block *b;
 	struct op_entry entry;
 	struct op_sample *sample;
 	unsigned long flags;
 	int size;
 	ERRSTACK(1);

 	flags = 0;

 	if (waserror()) {
 		poperror();
 		printk("%s: failed\n", __func__);
 		//print_func_exit();
 		return 1;
 	}

 	if (backtrace)
 		flags |= TRACE_BEGIN;

 	/* notice a switch from user->kernel or vice versa */
 	is_kernel = ! !is_kernel;
 	if (cpu_buf->last_is_kernel != is_kernel) {
 		cpu_buf->last_is_kernel = is_kernel;
 		flags |= KERNEL_CTX_SWITCH;
 		if (is_kernel)
 			flags |= IS_KERNEL;
 	}

 	/* notice a proc switch */
 	if (cpu_buf->last_proc != proc) {
 		cpu_buf->last_proc = proc;
 		flags |= USER_CTX_SWITCH;
 	}

 	if (!flags) {
 		poperror();
 		/* nothing to do */
 		//print_func_exit();
 		return 0;
 	}

 	if (flags & USER_CTX_SWITCH)
 		size = 1;
 	else
 		size = 0;

 	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, size);

 	entry.sample->eip = ESCAPE_CODE;
 	entry.sample->event = flags;

 	if (size)
 		op_cpu_buffer_add_data(&entry, (unsigned long)proc);

 	poperror();
 	//print_func_exit();
 	return 0;
 }

 static inline int
 op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
 			  unsigned long pc, unsigned long event)
 {
 	//print_func_entry();
 	ERRSTACK(1);
 	struct op_entry entry;
 	struct op_sample *sample;
 	struct block *b;

 	if (waserror()) {
 		poperror();
 		printk("%s: failed\n", __func__);
 		//print_func_exit();
 		return 1;
 	}

 	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, 0);

 	sample = entry.sample;
 	sample->eip = pc;
 	sample->event = event;
 	poperror();
 	//print_func_exit();
 	return 0;
 }

 /*
  * This must be safe from any context.
  *
  * is_kernel is needed because on some architectures you cannot
  * tell if you are in kernel or user space simply by looking at
  * pc. We tag this in the buffer by generating kernel enter/exit
  * events whenever is_kernel changes
  */
 static int
 log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
 		   unsigned long backtrace, int is_kernel, unsigned long event,
 		   struct proc *proc)
 {
 	//print_func_entry();
 	struct proc *tsk = proc ? proc : current;
 	cpu_buf->sample_received++;

 	if (pc == ESCAPE_CODE) {
 		cpu_buf->sample_invalid_eip++;
 		//print_func_exit();
 		return 0;
 	}

 	/* ah, so great. op_add* return 1 in event of failure.
 	 * this function returns 0 in event of failure.
 	 * what a cluster.
 	 */
 	spin_lock_irqsave(&cpu_buf->lock);
 	if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
 		goto fail;

 	if (op_add_sample(cpu_buf, pc, event))
 		goto fail;
 	spin_unlock_irqsave(&cpu_buf->lock);

 	//print_func_exit();
 	return 1;

 fail:
 	cpu_buf->sample_lost_overflow++;
 	//print_func_exit();
 	return 0;
 }

 static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
 {
 	//print_func_entry();
 	cpu_buf->tracing = 1;
 	//print_func_exit();
 }

 static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
 {
 	//print_func_entry();
 	cpu_buf->tracing = 0;
 	//print_func_exit();
 }

 void oprofile_cpubuf_flushone(int core, int newbuf)
 {
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf;
 	cpu_buf = &op_cpu_buffer[core];
 	spin_lock_irqsave(&cpu_buf->lock);
 	if (cpu_buf->block) {
 		printk("Core %d has data\n", core);
 		qibwrite(opq, cpu_buf->block);
 		printk("After qibwrite in %s, opq len %d\n", __func__, qlen(opq));
 	}
 	if (newbuf)
 		cpu_buf->block = iallocb(oprofile_cpu_buffer_size);
 	else
 		cpu_buf->block = NULL;
 	spin_unlock_irqsave(&cpu_buf->lock);
 	//print_func_exit();
 }

 void oprofile_cpubuf_flushall(int alloc)
 {
 	//print_func_entry();
 	int core;

 	for(core = 0; core < num_cpus; core++) {
 		oprofile_cpubuf_flushone(core, alloc);
 	}
 	//print_func_exit();
 }

 void oprofile_control_trace(int onoff)
 {
 	//print_func_entry();
 	int core;
 	struct oprofile_cpu_buffer *cpu_buf;

 	for(core = 0; core < num_cpus; core++) {
 		cpu_buf = &op_cpu_buffer[core];
 		cpu_buf->tracing = onoff;

 		if (onoff) {
 			printk("Enable tracing on %d\n", core);
 			continue;
 		}

 		/* halting. Force out all buffers. */
 		oprofile_cpubuf_flushone(core, 0);
 	}
 	//print_func_exit();
 }

 static inline void
 __oprofile_add_ext_sample(unsigned long pc,
 						  void /*struct pt_regs */ *const regs,
 						  unsigned long event, int is_kernel, struct proc *proc)
 {
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
 	unsigned long backtrace = oprofile_backtrace_depth;

 	/*
 	 * if log_sample() fail we can't backtrace since we lost the
 	 * source of this event
 	 */
 	if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, proc))
 		/* failed */
 	{
 		//print_func_exit();
 		return;
 	}

 	if (!backtrace) {
 		//print_func_exit();
 		return;
 	}
 #if 0
 	oprofile_begin_trace(cpu_buf);
 	oprofile_ops.backtrace(regs, backtrace);
 	oprofile_end_trace(cpu_buf);
 #endif
 	//print_func_exit();
 }

 void oprofile_add_ext_hw_sample(unsigned long pc,
 								void /*struct pt_regs */ *const regs,
 								unsigned long event, int is_kernel,
 								struct proc *proc)
 {
 	//print_func_entry();
 	__oprofile_add_ext_sample(pc, regs, event, is_kernel, proc);
 	//print_func_exit();
 }

 void oprofile_add_ext_sample(unsigned long pc,
 							 void /*struct pt_regs */ *const regs,
 							 unsigned long event, int is_kernel)
 {
 	//print_func_entry();
 	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
 	//print_func_exit();
 }

 void oprofile_add_sample(void /*struct pt_regs */ *const regs,
 						 unsigned long event)
 {
 	//print_func_entry();
 	int is_kernel;
 	unsigned long pc;

 	if (regs) {
 		is_kernel = 0;	// FIXME!user_mode(regs);
 		pc = 0;	// FIXME profile_pc(regs);
 	} else {
 		is_kernel = 0;	/* This value will not be used */
 		pc = ESCAPE_CODE;	/* as this causes an early return. */
 	}

 	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
 	//print_func_exit();
 }

 /*
  * Add samples with data to the ring buffer.
  *
  * Use oprofile_add_data(&entry, val) to add data and
  * oprofile_write_commit(&entry) to commit the sample.
  */
 void
 oprofile_write_reserve(struct op_entry *entry,
 					   void /*struct pt_regs */ *const regs,
 					   unsigned long pc, int code, int size)
 {
 	//print_func_entry();
 	ERRSTACK(1);
 	struct op_sample *sample;
 	struct block *b;
 	int is_kernel = 0;			// FIXME!user_mode(regs);
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

 	if (waserror()) {
 		printk("%s: failed\n", __func__);
 		poperror();
 		goto fail;
 	}
 	cpu_buf->sample_received++;

 	/* no backtraces for samples with data */
 	if (op_add_code(cpu_buf, 0, is_kernel, current))
 		goto fail;

 	b = op_cpu_buffer_write_reserve(cpu_buf, entry, size + 2);
 	sample = entry->sample;
 	sample->eip = ESCAPE_CODE;
 	sample->event = 0;	/* no flags */

 	op_cpu_buffer_add_data(entry, code);
 	op_cpu_buffer_add_data(entry, pc);
 	poperror();
 	//print_func_exit();
 	return;
 fail:
 	entry->event = NULL;
 	cpu_buf->sample_lost_overflow++;
 	//print_func_exit();
 }

 int oprofile_add_data(struct op_entry *entry, unsigned long val)
 {
 	//print_func_entry();
 	if (!entry->event) {
 		//print_func_exit();
 		return 0;
 	}
 	//print_func_exit();
 	return op_cpu_buffer_add_data(entry, val);
 }

 int oprofile_add_data64(struct op_entry *entry, uint64_t val)
 {
 	//print_func_entry();
 	if (!entry->event) {
 		//print_func_exit();
 		return 0;
 	}
 	if (op_cpu_buffer_get_size(entry) < 2)
 		/*
 		 * the function returns 0 to indicate a too small
 		 * buffer, even if there is some space left
 		 */
 	{
 		//print_func_exit();
 		return 0;
 	}
 	if (!op_cpu_buffer_add_data(entry, (uint32_t) val)) {
 		//print_func_exit();
 		return 0;
 	}
 	//print_func_exit();
 	return op_cpu_buffer_add_data(entry, (uint32_t) (val >> 32));
 }

 int oprofile_write_commit(struct op_entry *entry)
 {
 	//print_func_entry();
 	/* not much to do at present. In future, we might write the Block
 	 * to opq.
 	 */
 	//print_func_exit();
 	return 0;
 }

 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
 {
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
 	log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
 	//print_func_exit();
 }

 void oprofile_add_trace(unsigned long pc)
 {
 	if (! op_cpu_buffer)
 		return;
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

 	if (!cpu_buf->tracing) {
 		//print_func_exit();
 		return;
 	}

 	/*
 	 * broken frame can give an eip with the same value as an
 	 * escape code, abort the trace if we get it
 	 */
 	if (pc == ESCAPE_CODE)
 		goto fail;
 	if (op_add_sample(cpu_buf, pc, nsec()&~0xf))
 		goto fail;

 	//print_func_exit();
 	return;
 fail:
 	printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
 	cpu_buf->tracing = 0;
 	cpu_buf->backtrace_aborted++;
 	//print_func_exit();
 	return;
 }

 /* Format for samples:
  * first word:
  * high 8 bits is ee, which is an invalid address on amd64.
  * next 8 bits is protocol version
  * next 16 bits is unused, MBZ. Later, we can make it a packet type.
  * next 16 bits is core id
  * next 8 bits is unused
  * next 8 bits is # PCs following. This should be at least 1, for one EIP.
  *
  * second word is time in ns.
  *
  * Third and following words are PCs, there must be at least one of them.
  */
 void oprofile_add_backtrace(uintptr_t pc, uintptr_t fp)
 {
 	/* version 1. */
 	uint64_t descriptor = 0xee01ULL<<48;
 	if (! op_cpu_buffer)
 		return;
 	//print_func_entry();
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

 	if (!cpu_buf->tracing) {
 		//print_func_exit();
 		return;
 	}

 	struct op_entry entry;
 	struct op_sample *sample;
 	struct block *b;
 	uint64_t event = nsec();

 	uintptr_t bt_pcs[oprofile_backtrace_depth];

 	int nr_pcs;
 	nr_pcs = backtrace_list(pc, fp, bt_pcs, oprofile_backtrace_depth);

 	/* write_reserve always assumes passed-in-size + 2.
 	 * backtrace_depth should always be > 0.
 	 */
 	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, nr_pcs);

 	if (! b)
 		return;

 	/* we are changing the sample format, but not the struct
 	 * member names yet. Later, assuming this works out.
 	 */
 	descriptor |= (core_id() << 16) | nr_pcs;
 	sample = entry.sample;
 	sample->eip = descriptor;
 	sample->event = event;
 	memcpy(sample->data, bt_pcs, sizeof(uintptr_t) * nr_pcs);

 	//print_func_exit();
 	return;
 fail:
 	printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
 	cpu_buf->tracing = 0;
 	cpu_buf->backtrace_aborted++;
 	//print_func_exit();
 	return;
 }

 void oprofile_add_userpc(uintptr_t pc)
 {
 	struct oprofile_cpu_buffer *cpu_buf;
 	uint32_t pcoreid = core_id();
 	struct op_entry entry;
 	struct block *b;
 	uint64_t descriptor = (0xee01ULL << 48) | (pcoreid << 16) | 1;

 	if (!op_cpu_buffer)
 		return;
 	cpu_buf = &op_cpu_buffer[pcoreid];
 	if (!cpu_buf->tracing)
 		return;
 	/* write_reserve always assumes passed-in-size + 2.  need room for 1 PC. */
 	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, 1);
 	if (!b)
 		return;
 	entry.sample->eip = descriptor;
 	entry.sample->event = nsec();
 	/* entry.sample->data == entry.data */
 	assert(entry.sample->data == entry.data);
 	*entry.sample->data = pc;
 }

 int
 oproflen(void)
 {
 	return qlen(opq);
 }

 /* return # bytes read, or 0 if profiling is off, or block if profiling on and no data.
  */
 int
 oprofread(void *va, int n)
 {
 	int len = qlen(opq);
 	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
 	if (len == 0) {
 		if (cpu_buf->tracing == 0)
 			return 0;
 	}

 	len = qread(opq, va, n);
 	return len;
 }
	/**
	* @file cpu_buffer.c
	*
	* @remark Copyright 2002-2009 OProfile authors
	* @remark Read the file COPYING
	*
	* @author John Levon <levon@movementarian.org>
	* @author Barry Kasindorf <barry.kasindorf@amd.com>
	* @author Robert Richter <robert.richter@amd.com>
	*
	* Each CPU has a local buffer that stores PC value/event
	* pairs. We also log context switches when we notice them.
	* Eventually each CPU's buffer is processed into the global
	* event buffer by sync_buffer().
	*
	* We use a local buffer for two reasons: an NMI or similar
	* interrupt cannot synchronise, and high sampling rates
	* would lead to catastrophic global synchronisation if
	* a global buffer was used.
	*/
	#include "event_buffer.h"
	#include "cpu_buffer.h"
	#include "buffer_sync.h"
	#include "oprof.h"

	#define OP_BUFFER_FLAGS 0

	/* we allocate an array of these and set the pointer in pcpui */
	struct oprofile_cpu_buffer *op_cpu_buffer;

	/* this one queue is used by #K to get all events. */
	static struct queue *opq;

	/* this is run from core 0 for all cpu buffers. */
	static void wq_sync_buffer(void);
	unsigned long oprofile_cpu_buffer_size = 65536;
	unsigned long oprofile_backtrace_depth = 16;

	#define DEFAULT_TIMER_EXPIRE (HZ / 10)
	static int work_enabled;

	/*
	* Resets the cpu buffer to a sane state.
	*
	* reset these to invalid values; the next sample collected will
	* populate the buffer with proper values to initialize the buffer
	*/
	static inline void op_cpu_buffer_reset(int cpu)
	{
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

	cpu_buf->last_is_kernel = -1;
	cpu_buf->last_proc = NULL;
	//print_func_exit();
	}

	/* returns the remaining free size of data in the entry */
	static inline
	int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
	{
	//print_func_entry();
	assert(entry->size >= 0);
	if (!entry->size) {
	//print_func_exit();
	return 0;
	}
	*entry->data = val;
	entry->size--;
	entry->data++;
	//print_func_exit();
	return entry->size;
	}

	/* returns the size of data in the entry */
	static inline int op_cpu_buffer_get_size(struct op_entry *entry)
	{
	//print_func_entry();
	//print_func_exit();
	return entry->size;
	}

	/* returns 0 if empty or the size of data including the current value */
	static inline
	int op_cpu_buffer_get_data(struct op_entry entry, unsigned long val)
	{
	//print_func_entry();
	int size = entry->size;
	if (!size) {
	//print_func_exit();
	return 0;
	}
	val = entry->data;
	entry->size--;
	entry->data++;
	//print_func_exit();
	return size;
	}

	unsigned long oprofile_get_cpu_buffer_size(void)
	{
	//print_func_entry();
	//print_func_exit();
	return oprofile_cpu_buffer_size;
	}

	void oprofile_cpu_buffer_inc_smpl_lost(void)
	{
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

	cpu_buf->sample_lost_overflow++;
	//print_func_exit();
	}

	void free_cpu_buffers(void)
	{
	//print_func_entry();
	kfree(op_cpu_buffer);
	/* we can just leave the queue set up; it will then always return EOF */
	//print_func_exit();
	}

	#define RB_EVENT_HDR_SIZE 4

	int alloc_cpu_buffers(void)
	{
	//print_func_entry();
	/* should probably start using waserror() here. The fail stuff just gets
	* ugly.
	*/
	int i;
	unsigned long buffer_size = oprofile_cpu_buffer_size;
	unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
	RB_EVENT_HDR_SIZE);
	/* this can get called lots of times. Things might have been freed.
	* So be careful.
	*/
	/* what limit? No idea. */
	if (!opq)
	opq = qopen(1024, 0, NULL, NULL);
	if (!opq)
	goto fail;

	/* we really don't want to block. Losing data is better. */
	qnoblock(opq, 1);
	if (!op_cpu_buffer) {
	op_cpu_buffer =
	kzmalloc(sizeof(op_cpu_buffer) num_cpus, KMALLOC_WAIT);
	if (!op_cpu_buffer)
	goto fail;

	for (i = 0; i < num_cpus; i++) {
	struct oprofile_cpu_buffer *b = &op_cpu_buffer[i];
	b->last_proc = NULL;
	b->last_is_kernel = -1;
	b->tracing = 0;
	b->buffer_size = buffer_size;
	b->sample_received = 0;
	b->sample_lost_overflow = 0;
	b->backtrace_aborted = 0;
	b->sample_invalid_eip = 0;
	b->cpu = i;
	b->fullqueue = qopen(1024, Qmsg, NULL, NULL);
	b->emptyqueue = qopen(1024, Qmsg, NULL, NULL);
	spinlock_init_irqsave(&b->lock);
	}
	}

	//print_func_exit();
	return 0;

	fail:
	free_cpu_buffers();
	//print_func_exit();
	return -ENOMEM;
	}

	void start_cpu_work(void)
	{
	//print_func_entry();
	int i;

	work_enabled = 1;
	/* task starts here.
	schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
	*/
	//print_func_exit();
	}

	void end_cpu_work(void)
	{
	//print_func_entry();
	work_enabled = 0;
	//print_func_exit();
	}

	/* placeholder. Not used yet.
	*/
	void flush_cpu_work(void)
	{
	//print_func_entry();
	int i;
	struct oprofile_cpu_buffer *b = &op_cpu_buffer[core_id()];

	//print_func_exit();
	}

	/* Not used since we're not doing per-cpu buffering yet.
	*/

	struct op_sample op_cpu_buffer_read_entry(struct op_entry entry, int cpu)
	{
	//print_func_entry();
	//print_func_exit();
	return NULL;
	}

	static struct block op_cpu_buffer_write_reserve(struct oprofile_cpu_buffer cpu_buf,
	struct op_entry *entry, int size)
	{
	//print_func_entry();
	struct block *b;
	int totalsize = sizeof(struct op_sample) +
	size * sizeof(entry->sample->data[0]);

	b = cpu_buf->block;
	/* we might have run out. */
	if ((! b) \|\| (b->lim - b->wp) < size) {
	if (b){
	qibwrite(opq, b);
	}
	/* For now. Later, we will grab a block off the
	* emptyblock queue.
	*/
	cpu_buf->block = b = iallocb(oprofile_cpu_buffer_size);
	if (!b) {
	printk("%s: fail\n", __func__);
	//print_func_exit();
	return NULL;
	}
	}
	entry->sample = (void *)b->wp;
	entry->size = size;
	entry->data = entry->sample->data;

	b->wp += totalsize;
	//print_func_exit();
	return b;

	}

	static int
	op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
	int is_kernel, struct proc *proc)
	{
	//print_func_entry();
	struct block *b;
	struct op_entry entry;
	struct op_sample *sample;
	unsigned long flags;
	int size;
	ERRSTACK(1);

	flags = 0;

	if (waserror()) {
	poperror();
	printk("%s: failed\n", __func__);
	//print_func_exit();
	return 1;
	}

	if (backtrace)
	flags \|= TRACE_BEGIN;

	/* notice a switch from user->kernel or vice versa */
	is_kernel = ! !is_kernel;
	if (cpu_buf->last_is_kernel != is_kernel) {
	cpu_buf->last_is_kernel = is_kernel;
	flags \|= KERNEL_CTX_SWITCH;
	if (is_kernel)
	flags \|= IS_KERNEL;
	}

	/* notice a proc switch */
	if (cpu_buf->last_proc != proc) {
	cpu_buf->last_proc = proc;
	flags \|= USER_CTX_SWITCH;
	}

	if (!flags) {
	poperror();
	/* nothing to do */
	//print_func_exit();
	return 0;
	}

	if (flags & USER_CTX_SWITCH)
	size = 1;
	else
	size = 0;

	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, size);

	entry.sample->eip = ESCAPE_CODE;
	entry.sample->event = flags;

	if (size)
	op_cpu_buffer_add_data(&entry, (unsigned long)proc);

	poperror();
	//print_func_exit();
	return 0;
	}

	static inline int
	op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
	unsigned long pc, unsigned long event)
	{
	//print_func_entry();
	ERRSTACK(1);
	struct op_entry entry;
	struct op_sample *sample;
	struct block *b;

	if (waserror()) {
	poperror();
	printk("%s: failed\n", __func__);
	//print_func_exit();
	return 1;
	}

	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, 0);

	sample = entry.sample;
	sample->eip = pc;
	sample->event = event;
	poperror();
	//print_func_exit();
	return 0;
	}

	/*
	* This must be safe from any context.
	*
	* is_kernel is needed because on some architectures you cannot
	* tell if you are in kernel or user space simply by looking at
	* pc. We tag this in the buffer by generating kernel enter/exit
	* events whenever is_kernel changes
	*/
	static int
	log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
	unsigned long backtrace, int is_kernel, unsigned long event,
	struct proc *proc)
	{
	//print_func_entry();
	struct proc *tsk = proc ? proc : current;
	cpu_buf->sample_received++;

	if (pc == ESCAPE_CODE) {
	cpu_buf->sample_invalid_eip++;
	//print_func_exit();
	return 0;
	}

	/* ah, so great. op_add* return 1 in event of failure.
	* this function returns 0 in event of failure.
	* what a cluster.
	*/
	spin_lock_irqsave(&cpu_buf->lock);
	if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
	goto fail;

	if (op_add_sample(cpu_buf, pc, event))
	goto fail;
	spin_unlock_irqsave(&cpu_buf->lock);

	//print_func_exit();
	return 1;

	fail:
	cpu_buf->sample_lost_overflow++;
	//print_func_exit();
	return 0;
	}

	static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
	{
	//print_func_entry();
	cpu_buf->tracing = 1;
	//print_func_exit();
	}

	static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
	{
	//print_func_entry();
	cpu_buf->tracing = 0;
	//print_func_exit();
	}

	void oprofile_cpubuf_flushone(int core, int newbuf)
	{
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf;
	cpu_buf = &op_cpu_buffer[core];
	spin_lock_irqsave(&cpu_buf->lock);
	if (cpu_buf->block) {
	printk("Core %d has data\n", core);
	qibwrite(opq, cpu_buf->block);
	printk("After qibwrite in %s, opq len %d\n", __func__, qlen(opq));
	}
	if (newbuf)
	cpu_buf->block = iallocb(oprofile_cpu_buffer_size);
	else
	cpu_buf->block = NULL;
	spin_unlock_irqsave(&cpu_buf->lock);
	//print_func_exit();
	}

	void oprofile_cpubuf_flushall(int alloc)
	{
	//print_func_entry();
	int core;

	for(core = 0; core < num_cpus; core++) {
	oprofile_cpubuf_flushone(core, alloc);
	}
	//print_func_exit();
	}

	void oprofile_control_trace(int onoff)
	{
	//print_func_entry();
	int core;
	struct oprofile_cpu_buffer *cpu_buf;

	for(core = 0; core < num_cpus; core++) {
	cpu_buf = &op_cpu_buffer[core];
	cpu_buf->tracing = onoff;

	if (onoff) {
	printk("Enable tracing on %d\n", core);
	continue;
	}

	/* halting. Force out all buffers. */
	oprofile_cpubuf_flushone(core, 0);
	}
	//print_func_exit();
	}

	static inline void
	__oprofile_add_ext_sample(unsigned long pc,
	void /struct pt_regs / *const regs,
	unsigned long event, int is_kernel, struct proc *proc)
	{
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
	unsigned long backtrace = oprofile_backtrace_depth;

	/*
	* if log_sample() fail we can't backtrace since we lost the
	* source of this event
	*/
	if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, proc))
	/* failed */
	{
	//print_func_exit();
	return;
	}

	if (!backtrace) {
	//print_func_exit();
	return;
	}
	#if 0
	oprofile_begin_trace(cpu_buf);
	oprofile_ops.backtrace(regs, backtrace);
	oprofile_end_trace(cpu_buf);
	#endif
	//print_func_exit();
	}

	void oprofile_add_ext_hw_sample(unsigned long pc,
	void /struct pt_regs / *const regs,
	unsigned long event, int is_kernel,
	struct proc *proc)
	{
	//print_func_entry();
	__oprofile_add_ext_sample(pc, regs, event, is_kernel, proc);
	//print_func_exit();
	}

	void oprofile_add_ext_sample(unsigned long pc,
	void /struct pt_regs / *const regs,
	unsigned long event, int is_kernel)
	{
	//print_func_entry();
	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
	//print_func_exit();
	}

	void oprofile_add_sample(void /struct pt_regs / *const regs,
	unsigned long event)
	{
	//print_func_entry();
	int is_kernel;
	unsigned long pc;

	if (regs) {
	is_kernel = 0; // FIXME!user_mode(regs);
	pc = 0; // FIXME profile_pc(regs);
	} else {
	is_kernel = 0; /* This value will not be used */
	pc = ESCAPE_CODE; /* as this causes an early return. */
	}

	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
	//print_func_exit();
	}

	/*
	* Add samples with data to the ring buffer.
	*
	* Use oprofile_add_data(&entry, val) to add data and
	* oprofile_write_commit(&entry) to commit the sample.
	*/
	void
	oprofile_write_reserve(struct op_entry *entry,
	void /struct pt_regs / *const regs,
	unsigned long pc, int code, int size)
	{
	//print_func_entry();
	ERRSTACK(1);
	struct op_sample *sample;
	struct block *b;
	int is_kernel = 0; // FIXME!user_mode(regs);
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

	if (waserror()) {
	printk("%s: failed\n", __func__);
	poperror();
	goto fail;
	}
	cpu_buf->sample_received++;

	/* no backtraces for samples with data */
	if (op_add_code(cpu_buf, 0, is_kernel, current))
	goto fail;

	b = op_cpu_buffer_write_reserve(cpu_buf, entry, size + 2);
	sample = entry->sample;
	sample->eip = ESCAPE_CODE;
	sample->event = 0; /* no flags */

	op_cpu_buffer_add_data(entry, code);
	op_cpu_buffer_add_data(entry, pc);
	poperror();
	//print_func_exit();
	return;
	fail:
	entry->event = NULL;
	cpu_buf->sample_lost_overflow++;
	//print_func_exit();
	}

	int oprofile_add_data(struct op_entry *entry, unsigned long val)
	{
	//print_func_entry();
	if (!entry->event) {
	//print_func_exit();
	return 0;
	}
	//print_func_exit();
	return op_cpu_buffer_add_data(entry, val);
	}

	int oprofile_add_data64(struct op_entry *entry, uint64_t val)
	{
	//print_func_entry();
	if (!entry->event) {
	//print_func_exit();
	return 0;
	}
	if (op_cpu_buffer_get_size(entry) < 2)
	/*
	* the function returns 0 to indicate a too small
	* buffer, even if there is some space left
	*/
	{
	//print_func_exit();
	return 0;
	}
	if (!op_cpu_buffer_add_data(entry, (uint32_t) val)) {
	//print_func_exit();
	return 0;
	}
	//print_func_exit();
	return op_cpu_buffer_add_data(entry, (uint32_t) (val >> 32));
	}

	int oprofile_write_commit(struct op_entry *entry)
	{
	//print_func_entry();
	/* not much to do at present. In future, we might write the Block
	* to opq.
	*/
	//print_func_exit();
	return 0;
	}

	void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
	{
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
	log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
	//print_func_exit();
	}

	void oprofile_add_trace(unsigned long pc)
	{
	if (! op_cpu_buffer)
	return;
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

	if (!cpu_buf->tracing) {
	//print_func_exit();
	return;
	}

	/*
	* broken frame can give an eip with the same value as an
	* escape code, abort the trace if we get it
	*/
	if (pc == ESCAPE_CODE)
	goto fail;
	if (op_add_sample(cpu_buf, pc, nsec()&~0xf))
	goto fail;

	//print_func_exit();
	return;
	fail:
	printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
	cpu_buf->tracing = 0;
	cpu_buf->backtrace_aborted++;
	//print_func_exit();
	return;
	}

	/* Format for samples:
	* first word:
	* high 8 bits is ee, which is an invalid address on amd64.
	* next 8 bits is protocol version
	* next 16 bits is unused, MBZ. Later, we can make it a packet type.
	* next 16 bits is core id
	* next 8 bits is unused
	* next 8 bits is # PCs following. This should be at least 1, for one EIP.
	*
	* second word is time in ns.
	*
	* Third and following words are PCs, there must be at least one of them.
	*/
	void oprofile_add_backtrace(uintptr_t pc, uintptr_t fp)
	{
	/* version 1. */
	uint64_t descriptor = 0xee01ULL<<48;
	if (! op_cpu_buffer)
	return;
	//print_func_entry();
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];

	if (!cpu_buf->tracing) {
	//print_func_exit();
	return;
	}

	struct op_entry entry;
	struct op_sample *sample;
	struct block *b;
	uint64_t event = nsec();

	uintptr_t bt_pcs[oprofile_backtrace_depth];

	int nr_pcs;
	nr_pcs = backtrace_list(pc, fp, bt_pcs, oprofile_backtrace_depth);

	/* write_reserve always assumes passed-in-size + 2.
	* backtrace_depth should always be > 0.
	*/
	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, nr_pcs);

	if (! b)
	return;

	/* we are changing the sample format, but not the struct
	* member names yet. Later, assuming this works out.
	*/
	descriptor \|= (core_id() << 16) \| nr_pcs;
	sample = entry.sample;
	sample->eip = descriptor;
	sample->event = event;
	memcpy(sample->data, bt_pcs, sizeof(uintptr_t) * nr_pcs);

	//print_func_exit();
	return;
	fail:
	printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
	cpu_buf->tracing = 0;
	cpu_buf->backtrace_aborted++;
	//print_func_exit();
	return;
	}

	void oprofile_add_userpc(uintptr_t pc)
	{
	struct oprofile_cpu_buffer *cpu_buf;
	uint32_t pcoreid = core_id();
	struct op_entry entry;
	struct block *b;
	uint64_t descriptor = (0xee01ULL << 48) \| (pcoreid << 16) \| 1;

	if (!op_cpu_buffer)
	return;
	cpu_buf = &op_cpu_buffer[pcoreid];
	if (!cpu_buf->tracing)
	return;
	/* write_reserve always assumes passed-in-size + 2. need room for 1 PC. */
	b = op_cpu_buffer_write_reserve(cpu_buf, &entry, 1);
	if (!b)
	return;
	entry.sample->eip = descriptor;
	entry.sample->event = nsec();
	/* entry.sample->data == entry.data */
	assert(entry.sample->data == entry.data);
	*entry.sample->data = pc;
	}

	int
	oproflen(void)
	{
	return qlen(opq);
	}

	/* return # bytes read, or 0 if profiling is off, or block if profiling on and no data.
	*/
	int
	oprofread(void *va, int n)
	{
	int len = qlen(opq);
	struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
	if (len == 0) {
	if (cpu_buf->tracing == 0)
	return 0;
	}

	len = qread(opq, va, n);
	return len;
	}