aboutsummaryrefslogtreecommitdiffhomepage
path: root/trace_profile/perf.html
diff options
context:
space:
mode:
authorjohannst <johannst@users.noreply.github.com>2024-01-27 01:27:09 +0000
committerjohannst <johannst@users.noreply.github.com>2024-01-27 01:27:09 +0000
commite5b4aacc5d30dedc78fe955de6ab340374ca2920 (patch)
tree2f358c4624067dc0d0c78dcc33a8866b7e5ffc35 /trace_profile/perf.html
parent920d16436e0d5f37e9f6d31236ee87b716f3c82f (diff)
downloadnotes-e5b4aacc5d30dedc78fe955de6ab340374ca2920.tar.gz
notes-e5b4aacc5d30dedc78fe955de6ab340374ca2920.zip
deploy: c649f69634104c80d5da8af20871646e7d06aab1
Diffstat (limited to 'trace_profile/perf.html')
-rw-r--r--trace_profile/perf.html164
1 files changed, 154 insertions, 10 deletions
diff --git a/trace_profile/perf.html b/trace_profile/perf.html
index f4d68a2..aede7d4 100644
--- a/trace_profile/perf.html
+++ b/trace_profile/perf.html
@@ -88,7 +88,7 @@
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<div class="sidebar-scrollbox">
- <ol class="chapter"><li class="chapter-item expanded affix "><a href="../intro.html">Introduction</a></li><li class="chapter-item expanded "><a href="../tools/index.html"><strong aria-hidden="true">1.</strong> Tools</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../tools/zsh.html"><strong aria-hidden="true">1.1.</strong> zsh</a></li><li class="chapter-item expanded "><a href="../tools/bash.html"><strong aria-hidden="true">1.2.</strong> bash</a></li><li class="chapter-item expanded "><a href="../tools/fish.html"><strong aria-hidden="true">1.3.</strong> fish</a></li><li class="chapter-item expanded "><a href="../tools/tmux.html"><strong aria-hidden="true">1.4.</strong> tmux</a></li><li class="chapter-item expanded "><a href="../tools/git.html"><strong aria-hidden="true">1.5.</strong> git</a></li><li class="chapter-item expanded "><a href="../tools/awk.html"><strong aria-hidden="true">1.6.</strong> awk</a></li><li class="chapter-item expanded "><a href="../tools/emacs.html"><strong aria-hidden="true">1.7.</strong> emacs</a></li><li class="chapter-item expanded "><a href="../tools/gpg.html"><strong aria-hidden="true">1.8.</strong> gpg</a></li><li class="chapter-item expanded "><a href="../tools/gdb.html"><strong aria-hidden="true">1.9.</strong> gdb</a></li><li class="chapter-item expanded "><a href="../tools/gdbserver.html"><strong aria-hidden="true">1.10.</strong> gdbserver</a></li><li class="chapter-item expanded "><a href="../tools/radare2.html"><strong aria-hidden="true">1.11.</strong> radare2</a></li><li class="chapter-item expanded "><a href="../tools/qemu.html"><strong aria-hidden="true">1.12.</strong> qemu</a></li><li class="chapter-item expanded "><a href="../tools/pacman.html"><strong aria-hidden="true">1.13.</strong> pacman</a></li><li class="chapter-item expanded "><a href="../tools/dot.html"><strong aria-hidden="true">1.14.</strong> dot</a></li><li class="chapter-item expanded "><a href="../tools/ffmpeg.html"><strong aria-hidden="true">1.15.</strong> ffmpeg</a></li><li class="chapter-item expanded "><a href="../tools/column.html"><strong aria-hidden="true">1.16.</strong> column</a></li><li class="chapter-item expanded "><a href="../tools/sort.html"><strong aria-hidden="true">1.17.</strong> sort</a></li><li class="chapter-item expanded "><a href="../tools/sed.html"><strong aria-hidden="true">1.18.</strong> sed</a></li></ol></li><li class="chapter-item expanded "><a href="../monitor/index.html"><strong aria-hidden="true">2.</strong> Resource analysis & monitor</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../monitor/lsof.html"><strong aria-hidden="true">2.1.</strong> lsof</a></li><li class="chapter-item expanded "><a href="../monitor/ss.html"><strong aria-hidden="true">2.2.</strong> ss</a></li><li class="chapter-item expanded "><a href="../monitor/pidstat.html"><strong aria-hidden="true">2.3.</strong> pidstat</a></li><li class="chapter-item expanded "><a href="../monitor/pgrep.html"><strong aria-hidden="true">2.4.</strong> pgrep</a></li><li class="chapter-item expanded "><a href="../monitor/pmap.html"><strong aria-hidden="true">2.5.</strong> pmap</a></li><li class="chapter-item expanded "><a href="../monitor/pstack.html"><strong aria-hidden="true">2.6.</strong> pstack</a></li></ol></li><li class="chapter-item expanded "><a href="../trace_profile/index.html"><strong aria-hidden="true">3.</strong> Trace and Profile</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../trace_profile/time.html"><strong aria-hidden="true">3.1.</strong> time</a></li><li class="chapter-item expanded "><a href="../trace_profile/strace.html"><strong aria-hidden="true">3.2.</strong> strace</a></li><li class="chapter-item expanded "><a href="../trace_profile/ltrace.html"><strong aria-hidden="true">3.3.</strong> ltrace</a></li><li class="chapter-item expanded "><a href="../trace_profile/perf.html" class="active"><strong aria-hidden="true">3.4.</strong> perf</a></li><li class="chapter-item expanded "><a href="../trace_profile/oprofile.html"><strong aria-hidden="true">3.5.</strong> OProfile</a></li><li class="chapter-item expanded "><a href="../trace_profile/callgrind.html"><strong aria-hidden="true">3.6.</strong> callgrind</a></li></ol></li><li class="chapter-item expanded "><a href="../binary/index.html"><strong aria-hidden="true">4.</strong> Binary</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../binary/od.html"><strong aria-hidden="true">4.1.</strong> od</a></li><li class="chapter-item expanded "><a href="../binary/xxd.html"><strong aria-hidden="true">4.2.</strong> xxd</a></li><li class="chapter-item expanded "><a href="../binary/readelf.html"><strong aria-hidden="true">4.3.</strong> readelf</a></li><li class="chapter-item expanded "><a href="../binary/objdump.html"><strong aria-hidden="true">4.4.</strong> objdump</a></li><li class="chapter-item expanded "><a href="../binary/nm.html"><strong aria-hidden="true">4.5.</strong> nm</a></li></ol></li><li class="chapter-item expanded "><a href="../development/index.html"><strong aria-hidden="true">5.</strong> Development</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../development/c++filt.html"><strong aria-hidden="true">5.1.</strong> c++filt</a></li><li class="chapter-item expanded "><a href="../development/c++.html"><strong aria-hidden="true">5.2.</strong> c++</a></li><li class="chapter-item expanded "><a href="../development/glibc.html"><strong aria-hidden="true">5.3.</strong> glibc</a></li><li class="chapter-item expanded "><a href="../development/gcc.html"><strong aria-hidden="true">5.4.</strong> gcc</a></li><li class="chapter-item expanded "><a href="../development/cmake.html"><strong aria-hidden="true">5.5.</strong> cmake</a></li><li class="chapter-item expanded "><a href="../development/make.html"><strong aria-hidden="true">5.6.</strong> make</a></li><li class="chapter-item expanded "><a href="../development/ld.so.html"><strong aria-hidden="true">5.7.</strong> ld.so</a></li><li class="chapter-item expanded "><a href="../development/symbolver.html"><strong aria-hidden="true">5.8.</strong> symbol versioning</a></li><li class="chapter-item expanded "><a href="../development/python.html"><strong aria-hidden="true">5.9.</strong> python</a></li><li class="chapter-item expanded "><a href="../development/gcov.html"><strong aria-hidden="true">5.10.</strong> gcov</a></li><li class="chapter-item expanded "><a href="../development/pgo.html"><strong aria-hidden="true">5.11.</strong> pgo</a></li></ol></li><li class="chapter-item expanded "><a href="../linux/index.html"><strong aria-hidden="true">6.</strong> Linux</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../linux/systemd.html"><strong aria-hidden="true">6.1.</strong> systemd</a></li><li class="chapter-item expanded "><a href="../linux/coredump.html"><strong aria-hidden="true">6.2.</strong> coredump</a></li><li class="chapter-item expanded "><a href="../linux/ptrace_scope.html"><strong aria-hidden="true">6.3.</strong> ptrace_scope</a></li><li class="chapter-item expanded "><a href="../linux/cryptsetup.html"><strong aria-hidden="true">6.4.</strong> cryptsetup</a></li><li class="chapter-item expanded "><a href="../linux/swap.html"><strong aria-hidden="true">6.5.</strong> swap</a></li><li class="chapter-item expanded "><a href="../linux/input.html"><strong aria-hidden="true">6.6.</strong> input</a></li><li class="chapter-item expanded "><a href="../linux/acl.html"><strong aria-hidden="true">6.7.</strong> acl</a></li><li class="chapter-item expanded "><a href="../linux/zfs.html"><strong aria-hidden="true">6.8.</strong> zfs</a></li></ol></li><li class="chapter-item expanded "><a href="../network/index.html"><strong aria-hidden="true">7.</strong> Network</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../network/tcpdump.html"><strong aria-hidden="true">7.1.</strong> tcpdump</a></li><li class="chapter-item expanded "><a href="../network/tshark.html"><strong aria-hidden="true">7.2.</strong> tshark</a></li><li class="chapter-item expanded "><a href="../network/firewall-cmd.html"><strong aria-hidden="true">7.3.</strong> firewall-cmd</a></li><li class="chapter-item expanded "><a href="../network/nftables.html"><strong aria-hidden="true">7.4.</strong> nftables</a></li></ol></li><li class="chapter-item expanded "><a href="../web/index.html"><strong aria-hidden="true">8.</strong> Web</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../web/html.html"><strong aria-hidden="true">8.1.</strong> html</a></li><li class="chapter-item expanded "><a href="../web/css.html"><strong aria-hidden="true">8.2.</strong> css</a></li><li class="chapter-item expanded "><a href="../web/chartjs.html"><strong aria-hidden="true">8.3.</strong> chartjs</a></li></ol></li><li class="chapter-item expanded "><a href="../arch/index.html"><strong aria-hidden="true">9.</strong> Arch</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../arch/x86_64.html"><strong aria-hidden="true">9.1.</strong> x86_64</a></li><li class="chapter-item expanded "><a href="../arch/arm64.html"><strong aria-hidden="true">9.2.</strong> arm64</a></li><li class="chapter-item expanded "><a href="../arch/armv7.html"><strong aria-hidden="true">9.3.</strong> armv7</a></li><li class="chapter-item expanded "><a href="../arch/riscv.html"><strong aria-hidden="true">9.4.</strong> riscv</a></li></ol></li></ol>
+ <ol class="chapter"><li class="chapter-item expanded affix "><a href="../intro.html">Introduction</a></li><li class="chapter-item expanded "><a href="../tools/index.html"><strong aria-hidden="true">1.</strong> Tools</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../tools/zsh.html"><strong aria-hidden="true">1.1.</strong> zsh</a></li><li class="chapter-item expanded "><a href="../tools/bash.html"><strong aria-hidden="true">1.2.</strong> bash</a></li><li class="chapter-item expanded "><a href="../tools/fish.html"><strong aria-hidden="true">1.3.</strong> fish</a></li><li class="chapter-item expanded "><a href="../tools/tmux.html"><strong aria-hidden="true">1.4.</strong> tmux</a></li><li class="chapter-item expanded "><a href="../tools/git.html"><strong aria-hidden="true">1.5.</strong> git</a></li><li class="chapter-item expanded "><a href="../tools/awk.html"><strong aria-hidden="true">1.6.</strong> awk</a></li><li class="chapter-item expanded "><a href="../tools/emacs.html"><strong aria-hidden="true">1.7.</strong> emacs</a></li><li class="chapter-item expanded "><a href="../tools/gpg.html"><strong aria-hidden="true">1.8.</strong> gpg</a></li><li class="chapter-item expanded "><a href="../tools/gdb.html"><strong aria-hidden="true">1.9.</strong> gdb</a></li><li class="chapter-item expanded "><a href="../tools/gdbserver.html"><strong aria-hidden="true">1.10.</strong> gdbserver</a></li><li class="chapter-item expanded "><a href="../tools/radare2.html"><strong aria-hidden="true">1.11.</strong> radare2</a></li><li class="chapter-item expanded "><a href="../tools/qemu.html"><strong aria-hidden="true">1.12.</strong> qemu</a></li><li class="chapter-item expanded "><a href="../tools/pacman.html"><strong aria-hidden="true">1.13.</strong> pacman</a></li><li class="chapter-item expanded "><a href="../tools/dot.html"><strong aria-hidden="true">1.14.</strong> dot</a></li><li class="chapter-item expanded "><a href="../tools/ffmpeg.html"><strong aria-hidden="true">1.15.</strong> ffmpeg</a></li><li class="chapter-item expanded "><a href="../tools/column.html"><strong aria-hidden="true">1.16.</strong> column</a></li><li class="chapter-item expanded "><a href="../tools/sort.html"><strong aria-hidden="true">1.17.</strong> sort</a></li><li class="chapter-item expanded "><a href="../tools/sed.html"><strong aria-hidden="true">1.18.</strong> sed</a></li></ol></li><li class="chapter-item expanded "><a href="../monitor/index.html"><strong aria-hidden="true">2.</strong> Resource analysis & monitor</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../monitor/lsof.html"><strong aria-hidden="true">2.1.</strong> lsof</a></li><li class="chapter-item expanded "><a href="../monitor/ss.html"><strong aria-hidden="true">2.2.</strong> ss</a></li><li class="chapter-item expanded "><a href="../monitor/pidstat.html"><strong aria-hidden="true">2.3.</strong> pidstat</a></li><li class="chapter-item expanded "><a href="../monitor/pgrep.html"><strong aria-hidden="true">2.4.</strong> pgrep</a></li><li class="chapter-item expanded "><a href="../monitor/ps.html"><strong aria-hidden="true">2.5.</strong> ps</a></li><li class="chapter-item expanded "><a href="../monitor/pmap.html"><strong aria-hidden="true">2.6.</strong> pmap</a></li><li class="chapter-item expanded "><a href="../monitor/pstack.html"><strong aria-hidden="true">2.7.</strong> pstack</a></li></ol></li><li class="chapter-item expanded "><a href="../trace_profile/index.html"><strong aria-hidden="true">3.</strong> Trace and Profile</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../trace_profile/time.html"><strong aria-hidden="true">3.1.</strong> time</a></li><li class="chapter-item expanded "><a href="../trace_profile/strace.html"><strong aria-hidden="true">3.2.</strong> strace</a></li><li class="chapter-item expanded "><a href="../trace_profile/ltrace.html"><strong aria-hidden="true">3.3.</strong> ltrace</a></li><li class="chapter-item expanded "><a href="../trace_profile/perf.html" class="active"><strong aria-hidden="true">3.4.</strong> perf</a></li><li class="chapter-item expanded "><a href="../trace_profile/oprofile.html"><strong aria-hidden="true">3.5.</strong> OProfile</a></li><li class="chapter-item expanded "><a href="../trace_profile/callgrind.html"><strong aria-hidden="true">3.6.</strong> callgrind</a></li></ol></li><li class="chapter-item expanded "><a href="../binary/index.html"><strong aria-hidden="true">4.</strong> Binary</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../binary/od.html"><strong aria-hidden="true">4.1.</strong> od</a></li><li class="chapter-item expanded "><a href="../binary/xxd.html"><strong aria-hidden="true">4.2.</strong> xxd</a></li><li class="chapter-item expanded "><a href="../binary/readelf.html"><strong aria-hidden="true">4.3.</strong> readelf</a></li><li class="chapter-item expanded "><a href="../binary/objdump.html"><strong aria-hidden="true">4.4.</strong> objdump</a></li><li class="chapter-item expanded "><a href="../binary/nm.html"><strong aria-hidden="true">4.5.</strong> nm</a></li></ol></li><li class="chapter-item expanded "><a href="../development/index.html"><strong aria-hidden="true">5.</strong> Development</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../development/c++filt.html"><strong aria-hidden="true">5.1.</strong> c++filt</a></li><li class="chapter-item expanded "><a href="../development/c++.html"><strong aria-hidden="true">5.2.</strong> c++</a></li><li class="chapter-item expanded "><a href="../development/glibc.html"><strong aria-hidden="true">5.3.</strong> glibc</a></li><li class="chapter-item expanded "><a href="../development/gcc.html"><strong aria-hidden="true">5.4.</strong> gcc</a></li><li class="chapter-item expanded "><a href="../development/cmake.html"><strong aria-hidden="true">5.5.</strong> cmake</a></li><li class="chapter-item expanded "><a href="../development/make.html"><strong aria-hidden="true">5.6.</strong> make</a></li><li class="chapter-item expanded "><a href="../development/ld.so.html"><strong aria-hidden="true">5.7.</strong> ld.so</a></li><li class="chapter-item expanded "><a href="../development/symbolver.html"><strong aria-hidden="true">5.8.</strong> symbol versioning</a></li><li class="chapter-item expanded "><a href="../development/python.html"><strong aria-hidden="true">5.9.</strong> python</a></li><li class="chapter-item expanded "><a href="../development/gcov.html"><strong aria-hidden="true">5.10.</strong> gcov</a></li><li class="chapter-item expanded "><a href="../development/pgo.html"><strong aria-hidden="true">5.11.</strong> pgo</a></li></ol></li><li class="chapter-item expanded "><a href="../linux/index.html"><strong aria-hidden="true">6.</strong> Linux</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../linux/systemd.html"><strong aria-hidden="true">6.1.</strong> systemd</a></li><li class="chapter-item expanded "><a href="../linux/coredump.html"><strong aria-hidden="true">6.2.</strong> coredump</a></li><li class="chapter-item expanded "><a href="../linux/ptrace_scope.html"><strong aria-hidden="true">6.3.</strong> ptrace_scope</a></li><li class="chapter-item expanded "><a href="../linux/cryptsetup.html"><strong aria-hidden="true">6.4.</strong> cryptsetup</a></li><li class="chapter-item expanded "><a href="../linux/swap.html"><strong aria-hidden="true">6.5.</strong> swap</a></li><li class="chapter-item expanded "><a href="../linux/input.html"><strong aria-hidden="true">6.6.</strong> input</a></li><li class="chapter-item expanded "><a href="../linux/acl.html"><strong aria-hidden="true">6.7.</strong> acl</a></li><li class="chapter-item expanded "><a href="../linux/zfs.html"><strong aria-hidden="true">6.8.</strong> zfs</a></li></ol></li><li class="chapter-item expanded "><a href="../network/index.html"><strong aria-hidden="true">7.</strong> Network</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../network/tcpdump.html"><strong aria-hidden="true">7.1.</strong> tcpdump</a></li><li class="chapter-item expanded "><a href="../network/tshark.html"><strong aria-hidden="true">7.2.</strong> tshark</a></li><li class="chapter-item expanded "><a href="../network/firewall-cmd.html"><strong aria-hidden="true">7.3.</strong> firewall-cmd</a></li><li class="chapter-item expanded "><a href="../network/nftables.html"><strong aria-hidden="true">7.4.</strong> nftables</a></li></ol></li><li class="chapter-item expanded "><a href="../web/index.html"><strong aria-hidden="true">8.</strong> Web</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../web/html.html"><strong aria-hidden="true">8.1.</strong> html</a></li><li class="chapter-item expanded "><a href="../web/css.html"><strong aria-hidden="true">8.2.</strong> css</a></li><li class="chapter-item expanded "><a href="../web/chartjs.html"><strong aria-hidden="true">8.3.</strong> chartjs</a></li></ol></li><li class="chapter-item expanded "><a href="../arch/index.html"><strong aria-hidden="true">9.</strong> Arch</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../arch/x86_64.html"><strong aria-hidden="true">9.1.</strong> x86_64</a></li><li class="chapter-item expanded "><a href="../arch/arm64.html"><strong aria-hidden="true">9.2.</strong> arm64</a></li><li class="chapter-item expanded "><a href="../arch/armv7.html"><strong aria-hidden="true">9.3.</strong> armv7</a></li><li class="chapter-item expanded "><a href="../arch/riscv.html"><strong aria-hidden="true">9.4.</strong> riscv</a></li></ol></li></ol>
</div>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
@@ -177,12 +177,18 @@
<div id="content" class="content">
<main>
<h1 id="perf1"><a class="header" href="#perf1">perf(1)</a></h1>
-<pre><code class="language-markdown">perf list show supported hw/sw events
+<pre><code>perf list show supported hw/sw events &amp; metrics
+ -v ........ print longer event descriptions
+ --details . print information on the perf event names
+ and expressions used internally by events
perf stat
- -p &lt;pid&gt; .. show stats for running process
- -I &lt;ms&gt; ... show stats periodically over interval &lt;ms&gt;
- -e &lt;ev&gt; ... filter for events
+ -p &lt;pid&gt; ..... show stats for running process
+ -I &lt;ms&gt; ...... show stats periodically over interval &lt;ms&gt;
+ -e &lt;ev&gt; ...... select event(s)
+ -M &lt;met&gt; ..... print metric(s), this adds the metric events
+ --all-user ... configure all selected events for user space
+ --all-kernel . configure all selected events for kernel space
perf top
-p &lt;pid&gt; .. show stats for running process
@@ -198,30 +204,168 @@ perf record
dwarf: use .cfi debug information
lbr : use hardware last branch record facility
-g ..................... short-hand for --call-graph fp
- -e &lt;ev&gt; ................ filter for events
+ -e &lt;ev&gt; ................ select event(s)
+ --all-user ... configure all selected events for user space
+ --all-kernel . configure all selected events for kernel space
perf report
-n .................... annotate symbols with nr of samples
--stdio ............... report to stdio, if not presen tui mode
- -g graph,0.5,caller ... show caller based call chains with value &gt;0.5
+ -g graph,0.5,callee ... show callee based call chains with value &gt;0.5
</code></pre>
-<pre><code class="language-markdown">Useful &lt;ev&gt;:
+<pre><code>Useful &lt;ev&gt;:
page-faults
minor-faults
major-faults
cpu-cycles`
task-clock
</code></pre>
+<h2 id="select-specific-events"><a class="header" href="#select-specific-events">Select specific events</a></h2>
+<p>Events to sample are specified with the <code>-e</code> option, either pass a comma
+separated list or pass <code>-e</code> multiple times.</p>
+<p>Events are specified in the following form <code>name[:modifier]</code>. The list and
+description of the <code>modifier</code> can be found in the
+<a href="https://man7.org/linux/man-pages/man1/perf-list.1.html"><code>perf-list(1)</code></a> manpage under <code>EVENT MODIFIERS</code>.</p>
+<pre><code class="language-sh"># L1 i$ misses in user space
+# L2 i$ stats in user/kernel space mixed
+# Sample specified events.
+perf stat -e L1-icache-load-misses:u \
+ -e l2_rqsts.all_code_rd:uk,l2_rqsts.code_rd_hit:k,l2_rqsts.code_rd_miss:k \
+ -- stress -c 2
+</code></pre>
+<p>The <code>--all-user</code> and <code>--all-kernel</code> options append a <code>:u</code> and <code>:k</code> modifier to
+all specified events. Therefore the following two command lines are equivalent.</p>
+<pre><code class="language-sh"># 1)
+perf stat -e cycles:u,instructions:u -- ls
+
+# 2)
+perf stat --all-user -e cycles,instructions -- ls
+</code></pre>
+<h3 id="raw-events"><a class="header" href="#raw-events">Raw events</a></h3>
+<p>In case perf does not provide a <em>symbolic</em> name for an event, the event can be
+specified in a <em>raw</em> form as <code>r + UMask + EventCode</code>.</p>
+<p>The following is an example for the <a href="https://github.com/intel/perfmon/blob/09c155f72e1b8f14b09aea346a35467a03a7d62b/SNB/events/sandybridge_core.json#L808">L2_RQSTS.CODE_RD_HIT</a> event
+with <code>EventCode=0x24</code> and <code>UMask=0x10</code> on my laptop with a <code>sandybridge</code> uarch.</p>
+<pre><code class="language-sh">perf stat -e l2_rqsts.code_rd_hit -e r1024 -- ls
+# Performance counter stats for 'ls':
+#
+# 33.942 l2_rqsts.code_rd_hit
+# 33.942 r1024
+</code></pre>
+<h3 id="find-raw-performance-counter-events-intel"><a class="header" href="#find-raw-performance-counter-events-intel">Find raw performance counter events (intel)</a></h3>
+<p>The <a href="https://github.com/intel/perfmon"><code>intel/perfmon</code></a> repository provides a performance event
+databases for the different intel uarchs.</p>
+<p>The table in <a href="https://github.com/intel/perfmon/blob/main/mapfile.csv"><code>mapfile.csv</code></a> can be used to lookup the
+corresponding uarch, just grab the family model from the procfs.</p>
+<pre><code class="language-sh"> cat /proc/cpuinfo | awk '/^vendor_id/ { V=$3 }
+ /^cpu family/ { F=$4 }
+ /^model\s*:/ { printf &quot;%s-%d-%x\n&quot;,V,F,$3 }'
+</code></pre>
+<blockquote>
+<p>The table in <a href="https://github.com/intel/perfmon/tree/main#performance-monitoring-events">performance monitoring events</a> describes how
+events are sorted into the different files.</p>
+</blockquote>
+<h3 id="raw-events-for-perfs-own-symbolic-names"><a class="header" href="#raw-events-for-perfs-own-symbolic-names">Raw events for perfs own symbolic names</a></h3>
+<p>Perf also defines some own <em>symbolic</em> names for events. An example is the
+<code>cache-references</code> event. The <a href="https://man7.org/linux/man-pages/man2/perf_event_open.2.html"><code>perf_event_open(2)</code></a> manpage
+gives the following description.</p>
+<pre><code class="language-man">perf_event_open(2)
+
+PERF_COUNT_HW_CACHE_REFERENCES
+ Cache accesses. Usually this indicates Last Level Cache accesses but this
+ may vary depending on your CPU. This may include prefetches and coherency
+ messages; again this depends on the design of your CPU.
+</code></pre>
+<p>The <code>sysfs</code> can be consulted to get the concrete performance counter on the
+given system.</p>
+<pre><code class="language-sh">cat /sys/devices/cpu/events/cache-misses
+# event=0x2e,umask=0x41
+</code></pre>
<h2 id="flamegraph"><a class="header" href="#flamegraph"><a href="https://github.com/brendangregg/FlameGraph"><code>Flamegraph</code></a></a></h2>
<h3 id="flamegraph-with-single-event-trace"><a class="header" href="#flamegraph-with-single-event-trace">Flamegraph with single event trace</a></h3>
-<pre><code class="language-markdown">perf record -g -e cpu-cycles -p &lt;pid&gt;
+<pre><code>perf record -g -e cpu-cycles -p &lt;pid&gt;
perf script | FlameGraph/stackcollapse-perf.pl | FlameGraph/flamegraph.pl &gt; cycles-flamegraph.svg
</code></pre>
<h3 id="flamegraph-with-multiple-event-traces"><a class="header" href="#flamegraph-with-multiple-event-traces">Flamegraph with multiple event traces</a></h3>
-<pre><code class="language-markdown">perf record -g -e cpu-cycles,page-faults -p &lt;pid&gt;
+<pre><code class="language-sh">perf record -g -e cpu-cycles,page-faults -p &lt;pid&gt;
perf script --per-event-dump
# fold &amp; generate as above
</code></pre>
+<h2 id="examples"><a class="header" href="#examples">Examples</a></h2>
+<h3 id="determine-theoretical-max-instructions-per-cycle"><a class="header" href="#determine-theoretical-max-instructions-per-cycle">Determine theoretical max instructions per cycle</a></h3>
+<pre><code class="language-c">#define NOP4 &quot;nop\nnop\nnop\nnop\n&quot;
+#define NOP32 NOP4 NOP4 NOP4 NOP4 NOP4 NOP4 NOP4 NOP4
+#define NOP256 NOP32 NOP32 NOP32 NOP32 NOP32 NOP32 NOP32 NOP32
+#define NOP2048 NOP256 NOP256 NOP256 NOP256 NOP256 NOP256 NOP256 NOP256
+
+int main() {
+ for (unsigned i = 0; i &lt; 2000000; ++i) {
+ asm volatile(NOP2048);
+ }
+}
+</code></pre>
+<pre><code class="language-sh">perf stat -e cycles,instructions ./noploop
+# Performance counter stats for './noploop':
+#
+# 1.031.075.940 cycles
+# 4.103.534.341 instructions # 3,98 insn per cycle
+</code></pre>
+<h3 id="caller-vs-callee-callstacks"><a class="header" href="#caller-vs-callee-callstacks">Caller vs callee callstacks</a></h3>
+<p>The following gives an example for a scenario where we have the following calls</p>
+<ul>
+<li><code>main -&gt; do_foo() -&gt; do_work()</code></li>
+<li><code>main -&gt; do_bar() -&gt; do_work()</code></li>
+</ul>
+<pre><code class="language-sh">perf report --stdio -g graph,callee
+
+# Children Self Command Shared Object Symbols
+# ........ ........ ....... .................... .................
+#
+# 49.71% 49.66% bench bench [.] do_work
+# |
+# --49.66%--_start &lt;- callstack bottom
+# __libc_start_main
+# 0x7ff366c62ccf
+# main
+# |
+# |--25.13%--do_bar
+# | do_work &lt;- callstack top
+# |
+# --24.53%--do_foo
+# do_work
+
+perf report --stdio -g graph,callee
+
+# Children Self Command Shared Object Symbols
+# ........ ........ ....... .................... .................
+#
+# 49.71% 49.66% bench bench [.] do_work
+# |
+# ---do_work &lt;- callstack top
+# |
+# |--25.15%--do_bar
+# | main
+# | 0x7ff366c62ccf
+# | __libc_start_main
+# | _start &lt;- callstack bottom
+# |
+# --24.55%--do_foo
+# main
+# 0x7ff366c62ccf
+# __libc_start_main
+# _start &lt;- callstack bottom
+</code></pre>
+<h2 id="references"><a class="header" href="#references">References</a></h2>
+<ul>
+<li><a href="https://github.com/intel/perfmon">intel/perfmon</a> - intel PMU event database per uarch</li>
+<li><a href="https://perfmon-events.intel.com/">intel/perfmon-html</a> - a html rendered version of the PMU events with search</li>
+<li><a href="https://github.com/intel/perfmon/blob/main/mapfile.csv">intel/perfmon/mapfile.csv</a> - processor family to uarch mapping</li>
+<li><a href="https://github.com/torvalds/linux/tree/master/tools/perf/pmu-events/arch/x86">linux/perf/events</a> - x86 PMU events known to perf tools</li>
+<li><a href="https://github.com/torvalds/linux/blob/master/arch/x86/events/intel/core.c">linux/arch/events</a> - x86 PMU events linux kernel</li>
+<li><a href="https://en.wikichip.org/wiki/WikiChip">wikichip</a> - computer architecture wiki</li>
+<li><a href="https://man7.org/linux/man-pages/man1/perf-list.1.html">perf-list(1)</a> - manpage</li>
+<li><a href="https://man7.org/linux/man-pages/man2/perf_event_open.2.html">perf_event_open(2)</a> - manpage</li>
+</ul>
</main>