|
|
<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>perf - Notes</title>
<!-- Custom HTML head -->
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" href="../highlight.css">
<link rel="stylesheet" href="../tomorrow-night.css">
<link rel="stylesheet" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root to javascript -->
<script>
var path_to_root = "../";
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
var theme = localStorage.getItem('mdbook-theme');
var sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
var theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('light')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
var sidebar = null;
var sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">Notes</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/johannst/notes" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="perf1"><a class="header" href="#perf1">perf(1)</a></h1>
<pre><code>perf list show supported hw/sw events & metrics
-v ........ print longer event descriptions
--details . print information on the perf event names
and expressions used internally by events
perf stat
-p <pid> ..... show stats for running process
-o <file> .... write output to file (default stderr)
-I <ms> ...... show stats periodically over interval <ms>
-e <ev> ...... select event(s)
-M <met> ..... print metric(s), this adds the metric events
--all-user ... configure all selected events for user space
--all-kernel . configure all selected events for kernel space
perf top
-p <pid> .. show stats for running process
-F <hz> ... sampling frequency
-K ........ hide kernel threads
perf record
-p <pid> ............... record stats for running process
-o <file> .............. write output to file (default perf.data)
-F <hz> ................ sampling frequency
--call-graph <method> .. [fp, dwarf, lbr] method how to caputre backtrace
fp : use frame-pointer, need to compile with
-fno-omit-frame-pointer
dwarf: use .cfi debug information
lbr : use hardware last branch record facility
-g ..................... short-hand for --call-graph fp
-e <ev> ................ select event(s)
--all-user ............. configure all selected events for user space
--all-kernel ........... configure all selected events for kernel space
-M intel ............... use intel disassembly in annotate
perf report
-n .................... annotate symbols with nr of samples
--stdio ............... report to stdio, if not presen tui mode
-g graph,0.5,callee ... show callee based call chains with value >0.5
</code></pre>
<pre><code>Useful <ev>:
page-faults
minor-faults
major-faults
cpu-cycles`
task-clock
</code></pre>
<h2 id="select-specific-events"><a class="header" href="#select-specific-events">Select specific events</a></h2>
<p>Events to sample are specified with the <code>-e</code> option, either pass a comma
separated list or pass <code>-e</code> multiple times.</p>
<p>Events are specified in the following form <code>name[:modifier]</code>. The list and
description of the <code>modifier</code> can be found in the
<a href="https://man7.org/linux/man-pages/man1/perf-list.1.html"><code>perf-list(1)</code></a> manpage under <code>EVENT MODIFIERS</code>.</p>
<pre><code class="language-sh"># L1 i$ misses in user space
# L2 i$ stats in user/kernel space mixed
# Sample specified events.
perf stat -e L1-icache-load-misses:u \
-e l2_rqsts.all_code_rd:uk,l2_rqsts.code_rd_hit:k,l2_rqsts.code_rd_miss:k \
-- stress -c 2
</code></pre>
<p>The <code>--all-user</code> and <code>--all-kernel</code> options append a <code>:u</code> and <code>:k</code> modifier to
all specified events. Therefore the following two command lines are equivalent.</p>
<pre><code class="language-sh"># 1)
perf stat -e cycles:u,instructions:u -- ls
# 2)
perf stat --all-user -e cycles,instructions -- ls
</code></pre>
<h3 id="raw-events"><a class="header" href="#raw-events">Raw events</a></h3>
<p>In case perf does not provide a <em>symbolic</em> name for an event, the event can be
specified in a <em>raw</em> form as <code>r + UMask + EventCode</code>.</p>
<p>The following is an example for the <a href="https://github.com/intel/perfmon/blob/09c155f72e1b8f14b09aea346a35467a03a7d62b/SNB/events/sandybridge_core.json#L808">L2_RQSTS.CODE_RD_HIT</a> event
with <code>EventCode=0x24</code> and <code>UMask=0x10</code> on my laptop with a <code>sandybridge</code> uarch.</p>
<pre><code class="language-sh">perf stat -e l2_rqsts.code_rd_hit -e r1024 -- ls
# Performance counter stats for 'ls':
#
# 33.942 l2_rqsts.code_rd_hit
# 33.942 r1024
</code></pre>
<h3 id="find-raw-performance-counter-events-intel"><a class="header" href="#find-raw-performance-counter-events-intel">Find raw performance counter events (intel)</a></h3>
<p>The <a href="https://github.com/intel/perfmon"><code>intel/perfmon</code></a> repository provides a performance event
databases for the different intel uarchs.</p>
<p>The table in <a href="https://github.com/intel/perfmon/blob/main/mapfile.csv"><code>mapfile.csv</code></a> can be used to lookup the
corresponding uarch, just grab the family model from the procfs.</p>
<pre><code class="language-sh"> cat /proc/cpuinfo | awk '/^vendor_id/ { V=$3 }
/^cpu family/ { F=$4 }
/^model\s*:/ { printf "%s-%d-%x\n",V,F,$3 }'
</code></pre>
<blockquote>
<p>The table in <a href="https://github.com/intel/perfmon/tree/main#performance-monitoring-events">performance monitoring events</a> describes how
events are sorted into the different files.</p>
</blockquote>
<h3 id="raw-events-for-perfs-own-symbolic-names"><a class="header" href="#raw-events-for-perfs-own-symbolic-names">Raw events for perfs own symbolic names</a></h3>
<p>Perf also defines some own <em>symbolic</em> names for events. An example is the
<code>cache-references</code> event. The <a href="https://man7.org/linux/man-pages/man2/perf_event_open.2.html"><code>perf_event_open(2)</code></a> manpage
gives the following description.</p>
<pre><code class="language-man">perf_event_open(2)
PERF_COUNT_HW_CACHE_REFERENCES
Cache accesses. Usually this indicates Last Level Cache accesses but this
may vary depending on your CPU. This may include prefetches and coherency
messages; again this depends on the design of your CPU.
</code></pre>
<p>The <code>sysfs</code> can be consulted to get the concrete performance counter on the
given system.</p>
<pre><code class="language-sh">cat /sys/devices/cpu/events/cache-misses
# event=0x2e,umask=0x41
</code></pre>
<h2 id="flamegraph"><a class="header" href="#flamegraph"><a href="https://github.com/brendangregg/FlameGraph"><code>Flamegraph</code></a></a></h2>
<h3 id="flamegraph-with-single-event-trace"><a class="header" href="#flamegraph-with-single-event-trace">Flamegraph with single event trace</a></h3>
<pre><code>perf record -g -e cpu-cycles -p <pid>
perf script | FlameGraph/stackcollapse-perf.pl | FlameGraph/flamegraph.pl > cycles-flamegraph.svg
</code></pre>
<h3 id="flamegraph-with-multiple-event-traces"><a class="header" href="#flamegraph-with-multiple-event-traces">Flamegraph with multiple event traces</a></h3>
<pre><code class="language-sh">perf record -g -e cpu-cycles,page-faults -p <pid>
perf script --per-event-dump
# fold & generate as above
</code></pre>
<h2 id="examples"><a class="header" href="#examples">Examples</a></h2>
<h3 id="estimate-max-instructions-per-cycle"><a class="header" href="#estimate-max-instructions-per-cycle">Estimate max instructions per cycle</a></h3>
<pre><code class="language-c">#define NOP4 "nop\nnop\nnop\nnop\n"
#define NOP32 NOP4 NOP4 NOP4 NOP4 NOP4 NOP4 NOP4 NOP4
#define NOP256 NOP32 NOP32 NOP32 NOP32 NOP32 NOP32 NOP32 NOP32
#define NOP2048 NOP256 NOP256 NOP256 NOP256 NOP256 NOP256 NOP256 NOP256
int main() {
for (unsigned i = 0; i < 2000000; ++i) {
asm volatile(NOP2048);
}
}
</code></pre>
<pre><code class="language-sh">perf stat -e cycles,instructions ./noploop
# Performance counter stats for './noploop':
#
# 1.031.075.940 cycles
# 4.103.534.341 instructions # 3,98 insn per cycle
</code></pre>
<h3 id="caller-vs-callee-callstacks"><a class="header" href="#caller-vs-callee-callstacks">Caller vs callee callstacks</a></h3>
<p>The following gives an example for a scenario where we have the following calls</p>
<ul>
<li><code>main -> do_foo() -> do_work()</code></li>
<li><code>main -> do_bar() -> do_work()</code></li>
</ul>
<pre><code class="language-sh">perf report --stdio -g graph,caller
# Children Self Command Shared Object Symbols
# ........ ........ ....... .................... .................
#
# 49.71% 49.66% bench bench [.] do_work
# |
# --49.66%--_start <- callstack bottom
# __libc_start_main
# 0x7ff366c62ccf
# main
# |
# |--25.13%--do_bar
# | do_work <- callstack top
# |
# --24.53%--do_foo
# do_work
perf report --stdio -g graph,callee
# Children Self Command Shared Object Symbols
# ........ ........ ....... .................... .................
#
# 49.71% 49.66% bench bench [.] do_work
# |
# ---do_work <- callstack top
# |
# |--25.15%--do_bar
# | main
# | 0x7ff366c62ccf
# | __libc_start_main
# | _start <- callstack bottom
# |
# --24.55%--do_foo
# main
# 0x7ff366c62ccf
# __libc_start_main
# _start <- callstack bottom
</code></pre>
<h2 id="references"><a class="header" href="#references">References</a></h2>
<ul>
<li><a href="https://github.com/intel/perfmon">intel/perfmon</a> - intel PMU event database per uarch</li>
<li><a href="https://perfmon-events.intel.com/">intel/perfmon-html</a> - a html rendered version of the PMU
events with search</li>
<li><a href="https://github.com/intel/perfmon/blob/main/mapfile.csv">intel/perfmon/mapfile.csv</a> - processor family to uarch mapping</li>
<li><a href="https://github.com/torvalds/linux/tree/master/tools/perf/pmu-events/arch/x86">linux/perf/events</a> - x86 PMU events known to perf tools</li>
<li><a href="https://github.com/torvalds/linux/blob/master/arch/x86/events/intel/core.c">linux/arch/events</a> - x86 PMU events linux kernel</li>
<li><a href="https://en.wikichip.org/wiki/WikiChip">wikichip</a> - computer architecture wiki</li>
<li><a href="https://man7.org/linux/man-pages/man1/perf-list.1.html">perf-list(1)</a> - manpage</li>
<li><a href="https://man7.org/linux/man-pages/man2/perf_event_open.2.html">perf_event_open(2)</a> - manpage</li>
<li><a href="https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html">intel/sdm</a> - intel software developer manuals (eg Optimization
Reference Manual)</li>
</ul>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../trace_profile/ltrace.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../trace_profile/oprofile.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../trace_profile/ltrace.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../trace_profile/oprofile.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
</div>
</body>
</html>
|