| /* pfmlib_intel_x86.c : common code for Intel X86 processors |
| * |
| * Copyright (c) 2009 Google, Inc |
| * Contributed by Stephane Eranian <eranian@gmail.com> |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is furnished to do so, |
| * subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
| * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A |
| * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
| * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE |
| * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * This file implements the common code for all Intel X86 processors. |
| */ |
| #include <sys/types.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <stdarg.h> |
| |
| /* private headers */ |
| #include "pfmlib_priv.h" |
| #include "pfmlib_intel_x86_priv.h" |
| |
| const pfmlib_attr_desc_t intel_x86_mods[]={ |
| PFM_ATTR_B("k", "monitor at priv level 0"), /* monitor priv level 0 */ |
| PFM_ATTR_B("u", "monitor at priv level 1, 2, 3"), /* monitor priv level 1, 2, 3 */ |
| PFM_ATTR_B("e", "edge level (may require counter-mask >= 1)"), /* edge */ |
| PFM_ATTR_B("i", "invert"), /* invert */ |
| PFM_ATTR_I("c", "counter-mask in range [0-255]"), /* counter-mask */ |
| PFM_ATTR_B("t", "measure any thread"), /* monitor on both threads */ |
| PFM_ATTR_I("ldlat", "load latency threshold (cycles, [3-65535])"), /* load latency threshold */ |
| PFM_ATTR_B("intx", "monitor only inside transactional memory region"), |
| PFM_ATTR_B("intxcp", "do not count occurrences inside aborted transactional memory region"), |
| PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ |
| }; |
| |
| pfm_intel_x86_config_t pfm_intel_x86_cfg; |
| |
| /* |
| * .byte 0x53 == push ebx. it's universal for 32 and 64 bit |
| * .byte 0x5b == pop ebx. |
| * Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode. |
| * Using the opcode directly avoids this problem. |
| */ |
| static inline void |
| cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d) |
| { |
| __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b" |
| : "=a" (*a), |
| "=S" (*b), |
| "=c" (*c), |
| "=d" (*d) |
| : "a" (op)); |
| } |
| |
| static void |
| pfm_intel_x86_display_reg(void *this, pfmlib_event_desc_t *e) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| pfm_intel_x86_reg_t reg; |
| int i; |
| |
| reg.val = e->codes[0]; |
| |
| /* |
| * handle generic counters |
| */ |
| __pfm_vbprintf("[0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d " |
| "en=%d int=%d inv=%d edge=%d cnt_mask=%d", |
| reg.val, |
| reg.sel_event_select, |
| reg.sel_unit_mask, |
| reg.sel_os, |
| reg.sel_usr, |
| reg.sel_en, |
| reg.sel_int, |
| reg.sel_inv, |
| reg.sel_edge, |
| reg.sel_cnt_mask); |
| |
| if (pe[e->event].modmsk & _INTEL_X86_ATTR_T) |
| __pfm_vbprintf(" any=%d", reg.sel_anythr); |
| |
| __pfm_vbprintf("]", e->fstr); |
| |
| for (i = 1 ; i < e->count; i++) |
| __pfm_vbprintf(" [0x%"PRIx64"]", e->codes[i]); |
| |
| __pfm_vbprintf(" %s\n", e->fstr); |
| |
| } |
| |
| /* |
| * number of HW modifiers |
| */ |
| static int |
| intel_x86_num_mods(void *this, int idx) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| unsigned int mask; |
| |
| mask = pe[idx].modmsk; |
| return pfmlib_popcnt(mask); |
| } |
| |
| int |
| intel_x86_attr2mod(void *this, int pidx, int attr_idx) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| size_t x; |
| int n, numasks; |
| |
| numasks = intel_x86_num_umasks(this, pidx); |
| n = attr_idx - numasks; |
| |
| pfmlib_for_each_bit(x, pe[pidx].modmsk) { |
| if (n == 0) |
| break; |
| n--; |
| } |
| return x; |
| } |
| |
| /* |
| * detect processor model using cpuid() |
| * based on documentation |
| * http://www.intel.com/Assets/PDF/appnote/241618.pdf |
| */ |
| int |
| pfm_intel_x86_detect(void) |
| { |
| unsigned int a, b, c, d; |
| char buffer[64]; |
| |
| if (pfm_intel_x86_cfg.family) |
| return PFM_SUCCESS; |
| |
| cpuid(0, &a, &b, &c, &d); |
| strncpy(&buffer[0], (char *)(&b), 4); |
| strncpy(&buffer[4], (char *)(&d), 4); |
| strncpy(&buffer[8], (char *)(&c), 4); |
| buffer[12] = '\0'; |
| |
| /* must be Intel */ |
| if (strcmp(buffer, "GenuineIntel")) |
| return PFM_ERR_NOTSUPP; |
| |
| cpuid(1, &a, &b, &c, &d); |
| |
| pfm_intel_x86_cfg.family = (a >> 8) & 0xf; // bits 11 - 8 |
| pfm_intel_x86_cfg.model = (a >> 4) & 0xf; // Bits 7 - 4 |
| pfm_intel_x86_cfg.stepping = a & 0xf; // Bits 0 - 3 |
| |
| /* extended family */ |
| if (pfm_intel_x86_cfg.family == 0xf) |
| pfm_intel_x86_cfg.family += (a >> 20) & 0xff; |
| |
| /* extended model */ |
| if (pfm_intel_x86_cfg.family >= 0x6) |
| pfm_intel_x86_cfg.model += ((a >> 16) & 0xf) << 4; |
| |
| return PFM_SUCCESS; |
| } |
| |
| int pfm_intel_x86_model_detect(void *this) |
| { |
| pfmlib_pmu_t *pmu = this; |
| const int *p; |
| int ret; |
| |
| ret = pfm_intel_x86_detect(); |
| if (ret != PFM_SUCCESS) |
| return ret; |
| |
| if (pfm_intel_x86_cfg.family != pmu->cpu_family) |
| return PFM_ERR_NOTSUPP; |
| |
| for (p = pmu->cpu_models; *p; p++) { |
| if (*p == pfm_intel_x86_cfg.model) |
| return PFM_SUCCESS; |
| } |
| return PFM_ERR_NOTSUPP; |
| } |
| |
| int |
| pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e, |
| unsigned int msk, |
| uint64_t *umask, |
| unsigned int max_grpid) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| const intel_x86_entry_t *ent; |
| unsigned int i; |
| int j, k, added, skip; |
| int idx; |
| |
| k = e->nattrs; |
| ent = pe+e->event; |
| |
| for(i=0; msk; msk >>=1, i++) { |
| |
| if (!(msk & 0x1)) |
| continue; |
| |
| added = skip = 0; |
| /* |
| * must scan list of possible attributes |
| * (not all possible attributes) |
| */ |
| for (j = 0; j < e->npattrs; j++) { |
| if (e->pattrs[j].ctrl != PFM_ATTR_CTRL_PMU) |
| continue; |
| |
| if (e->pattrs[j].type != PFM_ATTR_UMASK) |
| continue; |
| |
| idx = e->pattrs[j].idx; |
| |
| if (ent->umasks[idx].grpid != i) |
| continue; |
| |
| if (max_grpid != INTEL_X86_MAX_GRPID && i > max_grpid) { |
| skip = 1; |
| continue; |
| } |
| |
| if (intel_x86_uflag(this, e->event, idx, INTEL_X86_GRP_DFL_NONE)) { |
| skip = 1; |
| continue; |
| } |
| |
| /* umask is default for group */ |
| if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) { |
| DPRINT("added default %s for group %d j=%d idx=%d ucode=0x%"PRIx64"\n", |
| ent->umasks[idx].uname, |
| i, |
| j, |
| idx, |
| ent->umasks[idx].ucode); |
| /* |
| * default could be an alias, but |
| * ucode must reflect actual code |
| */ |
| *umask |= ent->umasks[idx].ucode >> 8; |
| |
| e->attrs[k].id = j; /* pattrs index */ |
| e->attrs[k].ival = 0; |
| k++; |
| |
| added++; |
| if (intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) |
| goto done; |
| |
| if (intel_x86_uflag(this, e->event, idx, INTEL_X86_EXCL_GRP_GT)) { |
| if (max_grpid != INTEL_X86_MAX_GRPID) { |
| DPRINT("two max_grpid, old=%d new=%d\n", max_grpid, ent->umasks[idx].grpid); |
| return PFM_ERR_UMASK; |
| } |
| max_grpid = ent->umasks[idx].grpid; |
| } |
| } |
| } |
| if (!added && !skip) { |
| DPRINT("no default found for event %s unit mask group %d (max_grpid=%d)\n", ent->name, i, max_grpid); |
| return PFM_ERR_UMASK; |
| } |
| } |
| DPRINT("max_grpid=%d nattrs=%d k=%d umask=0x%"PRIx64"\n", max_grpid, e->nattrs, k, *umask); |
| done: |
| e->nattrs = k; |
| return PFM_SUCCESS; |
| } |
| |
| static int |
| intel_x86_check_pebs(void *this, pfmlib_event_desc_t *e) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| pfm_event_attr_info_t *a; |
| int numasks = 0, pebs = 0; |
| int i; |
| |
| #if 1 |
| if (1) // !intel_x86_requesting_pebs(e)) |
| return PFM_SUCCESS; |
| #endif |
| |
| /* |
| * if event has no umask and is PEBS, then we are okay |
| */ |
| if (!pe[e->event].numasks |
| && intel_x86_eflag(this, e->event, INTEL_X86_PEBS)) |
| return PFM_SUCCESS; |
| |
| /* |
| * if the event sets PEBS, then it measn at least one umask |
| * supports PEBS, so we need to check |
| */ |
| for (i = 0; i < e->nattrs; i++) { |
| a = attr(e, i); |
| |
| if (a->ctrl != PFM_ATTR_CTRL_PMU) |
| continue; |
| |
| if (a->type == PFM_ATTR_UMASK) { |
| /* count number of umasks */ |
| numasks++; |
| /* and those that support PEBS */ |
| if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_PEBS)) |
| pebs++; |
| } |
| } |
| /* |
| * pass if user requested only PEBS umasks |
| */ |
| return pebs != numasks ? PFM_ERR_FEATCOMB : PFM_SUCCESS; |
| } |
| |
| static int |
| intel_x86_check_max_grpid(void *this, pfmlib_event_desc_t *e, int max_grpid) |
| { |
| const intel_x86_entry_t *pe; |
| pfm_event_attr_info_t *a; |
| int i, grpid; |
| |
| DPRINT("check: max_grpid=%d\n", max_grpid); |
| pe = this_pe(this); |
| |
| for (i = 0; i < e->nattrs; i++) { |
| a = attr(e, i); |
| |
| if (a->ctrl != PFM_ATTR_CTRL_PMU) |
| continue; |
| |
| if (a->type == PFM_ATTR_UMASK) { |
| grpid = pe[e->event].umasks[a->idx].grpid; |
| if (grpid > max_grpid) |
| return PFM_ERR_FEATCOMB; |
| } |
| } |
| return PFM_SUCCESS; |
| } |
| |
| static int |
| pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e) |
| |
| { |
| pfmlib_pmu_t *pmu = this; |
| pfm_event_attr_info_t *a; |
| const intel_x86_entry_t *pe; |
| pfm_intel_x86_reg_t reg; |
| unsigned int grpmsk, ugrpmsk = 0; |
| uint64_t umask1, umask2, ucode, last_ucode = ~0ULL; |
| unsigned int modhw = 0; |
| unsigned int plmmsk = 0; |
| int umodmsk = 0, modmsk_r = 0; |
| int k, ret, id; |
| unsigned int max_grpid = INTEL_X86_MAX_GRPID; |
| unsigned int last_grpid = INTEL_X86_MAX_GRPID; |
| unsigned int grpid; |
| int ldlat = 0, ldlat_um = 0; |
| int grpcounts[INTEL_X86_NUM_GRP]; |
| int ncombo[INTEL_X86_NUM_GRP]; |
| |
| memset(grpcounts, 0, sizeof(grpcounts)); |
| memset(ncombo, 0, sizeof(ncombo)); |
| |
| pe = this_pe(this); |
| |
| e->fstr[0] = '\0'; |
| |
| /* |
| * preset certain fields from event code |
| * including modifiers |
| */ |
| reg.val = pe[e->event].code; |
| |
| grpmsk = (1 << pe[e->event].ngrp)-1; |
| |
| /* take into account hardcoded umask */ |
| umask1 = (reg.val >> 8) & 0xff; |
| umask2 = 0; |
| |
| modmsk_r = pe[e->event].modmsk_req; |
| |
| for (k = 0; k < e->nattrs; k++) { |
| a = attr(e, k); |
| |
| if (a->ctrl != PFM_ATTR_CTRL_PMU) |
| continue; |
| |
| if (a->type == PFM_ATTR_UMASK) { |
| grpid = pe[e->event].umasks[a->idx].grpid; |
| |
| /* |
| * certain event groups are meant to be |
| * exclusive, i.e., only unit masks of one group |
| * can be used |
| */ |
| if (last_grpid != INTEL_X86_MAX_GRPID && grpid != last_grpid |
| && intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) { |
| DPRINT("exclusive unit mask group error\n"); |
| return PFM_ERR_FEATCOMB; |
| } |
| /* |
| * selecting certain umasks in a group may exclude any umasks |
| * from any groups with a higher index |
| * |
| * enforcement requires looking at the grpid of all the umasks |
| */ |
| if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_EXCL_GRP_GT)) |
| max_grpid = grpid; |
| |
| /* |
| * upper layer has removed duplicates |
| * so if we come here more than once, it is for two |
| * disinct umasks |
| * |
| * NCOMBO=no combination of unit masks within the same |
| * umask group |
| */ |
| ++grpcounts[grpid]; |
| |
| /* mark that we have a umask with NCOMBO in this group */ |
| if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_NCOMBO)) |
| ncombo[grpid] = 1; |
| |
| if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_LDLAT)) |
| ldlat_um = 1; |
| /* |
| * if more than one umask in this group but one is marked |
| * with ncombo, then fail. It is okay to combine umask within |
| * a group as long as none is tagged with NCOMBO |
| */ |
| if (grpcounts[grpid] > 1 && ncombo[grpid]) { |
| DPRINT("umask %s does not support unit mask combination within group %d\n", pe[e->event].umasks[a->idx].uname, grpid); |
| return PFM_ERR_FEATCOMB; |
| } |
| |
| last_grpid = grpid; |
| ucode = pe[e->event].umasks[a->idx].ucode; |
| modhw |= pe[e->event].umasks[a->idx].modhw; |
| umask2 |= ucode >> 8; |
| ugrpmsk |= 1 << pe[e->event].umasks[a->idx].grpid; |
| |
| modmsk_r |= pe[e->event].umasks[a->idx].umodmsk_req; |
| |
| if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_CODE_OVERRIDE)) { |
| if (last_ucode != ~0ULL && (ucode & 0xff) != last_ucode) { |
| DPRINT("cannot override event with two different codes for %s\n", pe[e->event].name); |
| return PFM_ERR_FEATCOMB; |
| } |
| last_ucode = ucode & 0xff; |
| reg.sel_event_select = last_ucode; |
| } |
| } else if (a->type == PFM_ATTR_RAW_UMASK) { |
| |
| /* there can only be one RAW_UMASK per event */ |
| |
| /* sanity check */ |
| if (a->idx & ~0xff) { |
| DPRINT("raw umask is 8-bit wide\n"); |
| return PFM_ERR_ATTR; |
| } |
| /* override umask */ |
| umask2 = a->idx & 0xff; |
| ugrpmsk = grpmsk; |
| } else { |
| uint64_t ival = e->attrs[k].ival; |
| switch(a->idx) { |
| case INTEL_X86_ATTR_I: /* invert */ |
| if (modhw & _INTEL_X86_ATTR_I) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_inv = !!ival; |
| umodmsk |= _INTEL_X86_ATTR_I; |
| break; |
| case INTEL_X86_ATTR_E: /* edge */ |
| if (modhw & _INTEL_X86_ATTR_E) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_edge = !!ival; |
| umodmsk |= _INTEL_X86_ATTR_E; |
| break; |
| case INTEL_X86_ATTR_C: /* counter-mask */ |
| if (modhw & _INTEL_X86_ATTR_C) |
| return PFM_ERR_ATTR_SET; |
| if (ival > 255) |
| return PFM_ERR_ATTR_VAL; |
| reg.sel_cnt_mask = ival; |
| umodmsk |= _INTEL_X86_ATTR_C; |
| break; |
| case INTEL_X86_ATTR_U: /* USR */ |
| if (modhw & _INTEL_X86_ATTR_U) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_usr = !!ival; |
| plmmsk |= _INTEL_X86_ATTR_U; |
| umodmsk |= _INTEL_X86_ATTR_U; |
| break; |
| case INTEL_X86_ATTR_K: /* OS */ |
| if (modhw & _INTEL_X86_ATTR_K) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_os = !!ival; |
| plmmsk |= _INTEL_X86_ATTR_K; |
| umodmsk |= _INTEL_X86_ATTR_K; |
| break; |
| case INTEL_X86_ATTR_T: /* anythread (v3 and above) */ |
| if (modhw & _INTEL_X86_ATTR_T) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_anythr = !!ival; |
| umodmsk |= _INTEL_X86_ATTR_T; |
| break; |
| case INTEL_X86_ATTR_LDLAT: /* load latency */ |
| if (ival < 3 || ival > 65535) |
| return PFM_ERR_ATTR_VAL; |
| ldlat = ival; |
| break; |
| case INTEL_X86_ATTR_INTX: /* in_tx */ |
| if (modhw & _INTEL_X86_ATTR_INTX) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_intx = !!ival; |
| umodmsk |= _INTEL_X86_ATTR_INTX; |
| break; |
| case INTEL_X86_ATTR_INTXCP: /* in_tx_cp */ |
| if (modhw & _INTEL_X86_ATTR_INTXCP) |
| return PFM_ERR_ATTR_SET; |
| reg.sel_intxcp = !!ival; |
| umodmsk |= _INTEL_X86_ATTR_INTXCP; |
| break; |
| } |
| } |
| } |
| |
| /* |
| * handle case where no priv level mask was passed. |
| * then we use the dfl_plm |
| */ |
| if (!(plmmsk & (_INTEL_X86_ATTR_K|_INTEL_X86_ATTR_U))) { |
| if ((e->dfl_plm & PFM_PLM0) && (pmu->supported_plm & PFM_PLM0)) |
| reg.sel_os = 1; |
| if ((e->dfl_plm & PFM_PLM3) && (pmu->supported_plm & PFM_PLM3)) |
| reg.sel_usr = 1; |
| } |
| /* |
| * check that there is at least of unit mask in each unit |
| * mask group |
| */ |
| if ((ugrpmsk != grpmsk && !intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) || ugrpmsk == 0) { |
| ugrpmsk ^= grpmsk; |
| ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask2, max_grpid); |
| if (ret != PFM_SUCCESS) |
| return ret; |
| } |
| |
| ret = intel_x86_check_pebs(this, e); |
| if (ret != PFM_SUCCESS) |
| return ret; |
| |
| /* |
| * check no umask violates the max_grpid constraint |
| */ |
| if (max_grpid != INTEL_X86_MAX_GRPID) { |
| ret = intel_x86_check_max_grpid(this, e, max_grpid); |
| if (ret != PFM_SUCCESS) { |
| DPRINT("event %s: umask from grp > %d\n", pe[e->event].name, max_grpid); |
| return ret; |
| } |
| } |
| |
| if (modmsk_r && (umodmsk ^ modmsk_r)) { |
| DPRINT("required modifiers missing: 0x%x\n", modmsk_r); |
| return PFM_ERR_ATTR; |
| } |
| /* |
| * reorder all the attributes such that the fstr appears always |
| * the same regardless of how the attributes were submitted. |
| */ |
| evt_strcat(e->fstr, "%s", pe[e->event].name); |
| pfmlib_sort_attr(e); |
| for(k=0; k < e->nattrs; k++) { |
| a = attr(e, k); |
| if (a->ctrl != PFM_ATTR_CTRL_PMU) |
| continue; |
| if (a->type == PFM_ATTR_UMASK) |
| evt_strcat(e->fstr, ":%s", pe[e->event].umasks[a->idx].uname); |
| else if (a->type == PFM_ATTR_RAW_UMASK) |
| evt_strcat(e->fstr, ":0x%x", a->idx); |
| } |
| |
| if (intel_x86_eflag(this, e->event, INTEL_X86_NHM_OFFCORE)) { |
| e->codes[1] = umask2; |
| e->count = 2; |
| umask2 = 0; |
| } else { |
| e->count = 1; |
| } |
| |
| if (ldlat && !ldlat_um) { |
| DPRINT("passed ldlat= but not using ldlat umask\n"); |
| return PFM_ERR_ATTR; |
| } |
| |
| /* |
| * force a default ldlat (will not appear in display_reg) |
| */ |
| if (ldlat_um && !ldlat) { |
| DPRINT("missing ldlat= for umask, forcing to default %d cycles\n", INTEL_X86_LDLAT_DEFAULT); |
| ldlat = INTEL_X86_LDLAT_DEFAULT; |
| } |
| |
| if (ldlat && ldlat_um) { |
| e->codes[1] = ldlat; |
| e->count = 2; |
| } |
| |
| /* take into account hardcoded modifiers, so use or on reg.val */ |
| reg.val |= (umask1 | umask2) << 8; |
| |
| reg.sel_en = 1; /* force enable bit to 1 */ |
| reg.sel_int = 1; /* force APIC int to 1 */ |
| |
| e->codes[0] = reg.val; |
| |
| DPRINT("sel_edge=%d cnt=%d\n", reg.sel_edge, reg.sel_cnt_mask); |
| /* |
| * on recent processors (except Atom), edge requires cmask >=1 |
| */ |
| if ((pmu->flags & INTEL_X86_PMU_FL_ECMASK) |
| && reg.sel_edge && !reg.sel_cnt_mask) { |
| DPRINT("edge requires cmask >= 1\n"); |
| return PFM_ERR_ATTR; |
| } |
| |
| /* |
| * decode ALL modifiers |
| */ |
| for (k = 0; k < e->npattrs; k++) { |
| if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU) |
| continue; |
| |
| if (e->pattrs[k].type == PFM_ATTR_UMASK) |
| continue; |
| |
| id = e->pattrs[k].idx; |
| switch(id) { |
| case INTEL_X86_ATTR_U: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_usr); |
| break; |
| case INTEL_X86_ATTR_K: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_os); |
| break; |
| case INTEL_X86_ATTR_E: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_edge); |
| break; |
| case INTEL_X86_ATTR_I: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_inv); |
| break; |
| case INTEL_X86_ATTR_C: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_cnt_mask); |
| break; |
| case INTEL_X86_ATTR_T: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_anythr); |
| break; |
| case INTEL_X86_ATTR_LDLAT: |
| evt_strcat(e->fstr, ":%s=%d", intel_x86_mods[id].name, ldlat); |
| break; |
| case INTEL_X86_ATTR_INTX: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intx); |
| break; |
| case INTEL_X86_ATTR_INTXCP: |
| evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intxcp); |
| break; |
| } |
| } |
| return PFM_SUCCESS; |
| } |
| |
| int |
| pfm_intel_x86_get_encoding(void *this, pfmlib_event_desc_t *e) |
| { |
| int ret; |
| |
| ret = pfm_intel_x86_encode_gen(this, e); |
| if (ret != PFM_SUCCESS) |
| return ret; |
| |
| pfm_intel_x86_display_reg(this, e); |
| |
| return PFM_SUCCESS; |
| } |
| |
| int |
| pfm_intel_x86_get_event_first(void *this) |
| { |
| pfmlib_pmu_t *p = this; |
| |
| return p->pme_count ? 0 : -1; |
| } |
| |
| int |
| pfm_intel_x86_get_event_next(void *this, int idx) |
| { |
| pfmlib_pmu_t *p = this; |
| |
| if (idx >= (p->pme_count-1)) |
| return -1; |
| |
| return idx+1; |
| } |
| |
| int |
| pfm_intel_x86_event_is_valid(void *this, int pidx) |
| { |
| pfmlib_pmu_t *p = this; |
| return pidx >= 0 && pidx < p->pme_count; |
| } |
| |
| int |
| pfm_intel_x86_validate_table(void *this, FILE *fp) |
| { |
| pfmlib_pmu_t *pmu = this; |
| const intel_x86_entry_t *pe = this_pe(this); |
| int ndfl[INTEL_X86_NUM_GRP]; |
| int i, j, error = 0; |
| unsigned int u, v; |
| int npebs; |
| |
| if (!pmu->atdesc) { |
| fprintf(fp, "pmu: %s missing attr_desc\n", pmu->name); |
| error++; |
| } |
| |
| if (!pmu->supported_plm && pmu->type == PFM_PMU_TYPE_CORE) { |
| fprintf(fp, "pmu: %s supported_plm not set\n", pmu->name); |
| error++; |
| } |
| |
| for(i=0; i < pmu->pme_count; i++) { |
| |
| if (!pe[i].name) { |
| fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i, |
| i > 1 ? pe[i-1].name : "??"); |
| error++; |
| } |
| |
| if (!pe[i].desc) { |
| fprintf(fp, "pmu: %s event%d: %s :: no description\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (!pe[i].cntmsk) { |
| fprintf(fp, "pmu: %s event%d: %s :: cntmsk=0\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (pe[i].numasks && pe[i].ngrp == 0) { |
| fprintf(fp, "pmu: %s event%d: %s :: ngrp cannot be zero\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (pe[i].numasks && pe[i].umasks == NULL) { |
| fprintf(fp, "pmu: %s event%d: %s :: numasks but no umasks\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (pe[i].numasks == 0 && pe[i].umasks) { |
| fprintf(fp, "pmu: %s event%d: %s :: numasks=0 but umasks defined\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (pe[i].numasks == 0 && pe[i].ngrp) { |
| fprintf(fp, "pmu: %s event%d: %s :: ngrp must be zero\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (pe[i].ngrp >= INTEL_X86_NUM_GRP) { |
| fprintf(fp, "pmu: %s event%d: %s :: ngrp too big (max=%d)\n", pmu->name, i, pe[i].name, INTEL_X86_NUM_GRP); |
| error++; |
| } |
| |
| for (j=i+1; j < (int)pmu->pme_count; j++) { |
| if (pe[i].code == pe[j].code && !(pe[j].equiv || pe[i].equiv) && pe[j].cntmsk == pe[i].cntmsk) { |
| fprintf(fp, "pmu: %s events %s and %s have the same code 0x%x\n", pmu->name, pe[i].name, pe[j].name, pe[i].code); |
| error++; |
| } |
| } |
| |
| for(j=0; j < INTEL_X86_NUM_GRP; j++) |
| ndfl[j] = 0; |
| |
| for(j=0, npebs = 0; j < (int)pe[i].numasks; j++) { |
| |
| if (!pe[i].umasks[j].uname) { |
| fprintf(fp, "pmu: %s event%d: %s umask%d :: no name\n", pmu->name, i, pe[i].name, j); |
| error++; |
| } |
| if (pe[i].umasks[j].modhw && (pe[i].umasks[j].modhw | pe[i].modmsk) != pe[i].modmsk) { |
| fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: modhw not subset of modmsk\n", pmu->name, i, pe[i].name, j, pe[i].umasks[j].uname); |
| error++; |
| } |
| |
| if (!pe[i].umasks[j].udesc) { |
| fprintf(fp, "pmu: %s event%d: umask%d: %s :: no description\n", pmu->name, i, j, pe[i].umasks[j].uname); |
| error++; |
| } |
| |
| if (pe[i].ngrp && pe[i].umasks[j].grpid >= pe[i].ngrp) { |
| fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: invalid grpid %d (must be < %d)\n", pmu->name, i, pe[i].name, j, pe[i].umasks[j].uname, pe[i].umasks[j].grpid, pe[i].ngrp); |
| error++; |
| } |
| if (pe[i].umasks[j].uflags & INTEL_X86_DFL) |
| ndfl[pe[i].umasks[j].grpid]++; |
| |
| if (pe[i].umasks[j].uflags & INTEL_X86_PEBS) |
| npebs++; |
| } |
| |
| if (npebs && !intel_x86_eflag(this, i, INTEL_X86_PEBS)) { |
| fprintf(fp, "pmu: %s event%d: %s, pebs umasks but event pebs flag not set\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (intel_x86_eflag(this, i, INTEL_X86_PEBS) && pe[i].numasks && npebs == 0) { |
| fprintf(fp, "pmu: %s event%d: %s, pebs event flag but not umask has pebs flag\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| /* if only one umask, then ought to be default */ |
| if (pe[i].numasks == 1 && !(pe[i].umasks[0].uflags & INTEL_X86_DFL)) { |
| fprintf(fp, "pmu: %s event%d: %s, only one umask but no default\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| if (pe[i].numasks) { |
| unsigned int *dfl_model = malloc(sizeof(*dfl_model) * pe[i].numasks); |
| if (!dfl_model) |
| goto skip_dfl; |
| for(u=0; u < pe[i].ngrp; u++) { |
| int l = 0, m; |
| for (v = 0; v < pe[i].numasks; v++) { |
| if (pe[i].umasks[v].grpid != u) |
| continue; |
| if (pe[i].umasks[v].uflags & INTEL_X86_DFL) { |
| for (m = 0; m < l; m++) { |
| if (dfl_model[m] == pe[i].umasks[v].umodel || dfl_model[m] == 0) { |
| fprintf(fp, "pmu: %s event%d: %s grpid %d has 2 default umasks\n", pmu->name, i, pe[i].name, u); |
| error++; |
| } |
| } |
| if (m == l) |
| dfl_model[l++] = pe[i].umasks[v].umodel; |
| } |
| } |
| } |
| free(dfl_model); |
| } |
| skip_dfl: |
| |
| if (pe[i].flags & INTEL_X86_NCOMBO) { |
| fprintf(fp, "pmu: %s event%d: %s :: NCOMBO is unit mask only flag\n", pmu->name, i, pe[i].name); |
| error++; |
| } |
| |
| for(u=0; u < pe[i].numasks; u++) { |
| |
| if (pe[i].umasks[u].uequiv) |
| continue; |
| |
| if (pe[i].umasks[u].uflags & INTEL_X86_NCOMBO) |
| continue; |
| |
| for(v=j+1; v < pe[i].numasks; v++) { |
| if (pe[i].umasks[v].uequiv) |
| continue; |
| if (pe[i].umasks[v].uflags & INTEL_X86_NCOMBO) |
| continue; |
| if (pe[i].umasks[v].grpid != pe[i].umasks[u].grpid) |
| continue; |
| if ((pe[i].umasks[u].ucode & pe[i].umasks[v].ucode) && pe[i].umasks[u].umodel == pe[i].umasks[v].umodel) { |
| fprintf(fp, "pmu: %s event%d: %s :: umask %s and %s have overlapping code bits\n", pmu->name, i, pe[i].name, pe[i].umasks[u].uname, pe[i].umasks[v].uname); |
| error++; |
| } |
| } |
| } |
| } |
| return error ? PFM_ERR_INVAL : PFM_SUCCESS; |
| } |
| |
| int |
| pfm_intel_x86_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| const pfmlib_attr_desc_t *atdesc = this_atdesc(this); |
| int numasks, idx; |
| |
| numasks = intel_x86_num_umasks(this, pidx); |
| if (attr_idx < numasks) { |
| idx = intel_x86_attr2umask(this, pidx, attr_idx); |
| info->name = pe[pidx].umasks[idx].uname; |
| info->desc = pe[pidx].umasks[idx].udesc; |
| info->equiv= pe[pidx].umasks[idx].uequiv; |
| |
| info->code = pe[pidx].umasks[idx].ucode; |
| if (!intel_x86_uflag(this, pidx, idx, INTEL_X86_CODE_OVERRIDE)) |
| info->code >>= 8; |
| |
| info->type = PFM_ATTR_UMASK; |
| info->is_dfl = intel_x86_uflag(this, pidx, idx, INTEL_X86_DFL); |
| info->is_precise = intel_x86_uflag(this, pidx, idx, INTEL_X86_PEBS); |
| } else { |
| idx = intel_x86_attr2mod(this, pidx, attr_idx); |
| info->name = atdesc[idx].name; |
| info->desc = atdesc[idx].desc; |
| info->type = atdesc[idx].type; |
| info->equiv= NULL; |
| info->code = idx; |
| info->is_dfl = 0; |
| info->is_precise = 0; |
| } |
| |
| info->ctrl = PFM_ATTR_CTRL_PMU; |
| info->idx = idx; /* namespace specific index */ |
| info->dfl_val64 = 0; |
| |
| return PFM_SUCCESS; |
| } |
| |
| int |
| pfm_intel_x86_get_event_info(void *this, int idx, pfm_event_info_t *info) |
| { |
| const intel_x86_entry_t *pe = this_pe(this); |
| pfmlib_pmu_t *pmu = this; |
| |
| info->name = pe[idx].name; |
| info->desc = pe[idx].desc; |
| info->code = pe[idx].code; |
| info->equiv = pe[idx].equiv; |
| info->idx = idx; /* private index */ |
| info->pmu = pmu->pmu; |
| /* |
| * no umask: event supports PEBS |
| * with umasks: at least one umask supports PEBS |
| */ |
| info->is_precise = intel_x86_eflag(this, idx, INTEL_X86_PEBS); |
| |
| info->nattrs = intel_x86_num_umasks(this, idx); |
| info->nattrs += intel_x86_num_mods(this, idx); |
| |
| return PFM_SUCCESS; |
| } |
| |
| int |
| pfm_intel_x86_valid_pebs(pfmlib_event_desc_t *e) |
| { |
| pfm_event_attr_info_t *a; |
| int i, npebs = 0, numasks = 0; |
| |
| /* first check at the event level */ |
| if (intel_x86_eflag(e->pmu, e->event, INTEL_X86_PEBS)) |
| return PFM_SUCCESS; |
| |
| /* |
| * next check the umasks |
| * |
| * we do not assume we are calling after |
| * pfm_intel_x86_ge_event_encoding(), therefore |
| * we check the unit masks again. |
| * They must all be PEBS-capable. |
| */ |
| for(i=0; i < e->nattrs; i++) { |
| |
| a = attr(e, i); |
| |
| if (a->ctrl != PFM_ATTR_CTRL_PMU || a->type != PFM_ATTR_UMASK) |
| continue; |
| |
| numasks++; |
| if (intel_x86_uflag(e->pmu, e->event, a->idx, INTEL_X86_PEBS)) |
| npebs++; |
| } |
| return npebs == numasks ? PFM_SUCCESS : PFM_ERR_FEATCOMB; |
| } |
| |
| unsigned int |
| pfm_intel_x86_get_event_nattrs(void *this, int pidx) |
| { |
| unsigned int nattrs; |
| nattrs = intel_x86_num_umasks(this, pidx); |
| nattrs += intel_x86_num_mods(this, pidx); |
| return nattrs; |
| } |
| |
| int |
| pfm_intel_x86_can_auto_encode(void *this, int pidx, int uidx) |
| { |
| int numasks; |
| |
| if (intel_x86_eflag(this, pidx, INTEL_X86_NO_AUTOENCODE)) |
| return 0; |
| |
| numasks = intel_x86_num_umasks(this, pidx); |
| if (uidx >= numasks) |
| return 0; |
| |
| return !intel_x86_uflag(this, pidx, uidx, INTEL_X86_NO_AUTOENCODE); |
| } |