From b6627f53bf459334446bb1e2c51728a764c8651d Mon Sep 17 00:00:00 2001
From: johannst Vtune offers different analysis. Run The following shows some common flows with the Some common options are the following. Vtune offers an API to resume and pause the profile collection from within
+the profilee itself. This can be helpful if either only a certain phase should
+be profiled or some phase should be skipped. The following gives an example where only one phase in the program is profiled.
+The program makes calls to the vtune API to resume and pause the collection,
+while vtune is invoked with The makefile gives an example how to build and profile the application. While debugging can be helpful to just pre-process files. Give the compiler a hint which branch is hot, so it can lay out the code
@@ -8231,6 +8304,9 @@ popfd // pop flags (4byte) from stack
See guest64-msr.S as an example. Explicitly specify size of the operation./usr/bin/time(1)
+# statistics of process run
@@ -3222,6 +3223,59 @@ int main() {
--gen-suppressions=yes Generate suppressions file from the run.
--suppressions=FILE Load suppressions file.
vtune(1)
+vtune -collect help
to list the
+availale analysis.Profiling
+hotspot
analsysis
+as an example.
+# Launch and profile process.
+vtune -collect hotspots [opts] -- target [args]
+
+# Attach and profile running process.
+vtune -collect hotspots [opts] -target-pid <pid>
+
+-r <dir> output directory for the profile
+-no-follow-child dont attach to to child processes (default is to follow)
+-start-paused start with paused profiling
+
Analyze
+
+vtune-gui <dir>
+
Programmatically control sampling
+-start-paused
to pause profiling initially.
+#include <ittnotify.h>
+
+void init();
+void compute();
+void shutdown();
+
+int main() {
+ init();
+
+ __itt_resume();
+ compute();
+ __itt_pause();
+
+ shutdown();
+ return 0;
+}
+
VTUNE ?= /opt/intel/oneapi/vtune/latest
+
+main: main.c
+ gcc -o $@ $^ -I$(VTUNE)/include -L$(VTUNE)/lib64 -littnotify
+
+vtune: main
+ $(VTUNE)/bin64/vtune -collect hotspots -start-paused -- ./main
+
Debug
gcc(1)
CLI
+
+
-v
verbose, outputs exact compiler/linker invocations made by the gcc driver-###
dry-run, outputting exact compiler/linker invocations-print-multi-lib
print available multilib configurations--help=<class>
print description of cmdline options for given class, eg
+warnings
, optimizers
, target
, c
, c++
-Wl,<opt>
additional option passed to the linker invocation (can
+be specified multiple times)-Wl,--trace
trace each file the linker touchesPreprocessing
gcc -E [-dM] ...
@@ -4874,10 +4938,6 @@ LD_PRELOAD=./libmtrace.so <binary>
-E
run only preprocessor-dM
list only #define
statements-###
dry-run, outputting exact compiler/linker invocations-print-multi-lib
print available multilib configurations--help=<class>
print description of cmdline options for given class, eg
-warnings
, optimizers
, target
, c
, c++
Target options
+# List all target options with their description.
@@ -4895,6 +4955,19 @@ gcc --help=optimizers
# Prepend --help with `-Q` to print wheter options are enabled or disabled
# instead showing their description.
Sanitizer
+# Enable address sanitizer, a memory error checker (out of bounds, use after free, ..).
+gcc -fsanitize=address ...
+
+# Enable leak sanitizer, a memory leak detector.
+gcc -fsanitize=leak
+
+# Enable undefined behavior sanitizer, detects various UBs (integer overflow, ..).
+gcc -fsanitize=undefined ...
+
+# Enable thread sanitizer, a data race detector.
+gcc -fsanitize=thread
+
Builtins
__builtin_expect(expr, cond)
+rdmsr // Read MSR register, effectively does EDX:EAX <- MSR[ECX]
wrmsr // Write MSR register, effectively does MSR[ECX] <- EDX:EAX
+
Size directives
mov byte ptr [rax], 0xff // save 1 byte(s) at [rax]
@@ -8316,6 +8392,13 @@ core changes frequency. This is the architectural behavior moving forward.
grep constant_tsc /proc/cpuinfo
+cpuid
cpuid // in: eax leaf; ecx sub-leaf
+ // out: eax, ebx, ecx, edx (interpreting depends on leaf)
+
+This instruction is used to query for availability of certain +instructions or hardware details like cache sizes and son on.
+An example how to read cpuid leafs is show in cpuid.c.
# Segment Selector (cs, ds, es, ss, fs, gs).
[15:3] I Descriptor Index
- [2:1] TI Table Indicator (0=GTD | 1=LDT)
- [0] RPL Requested Privilege Level
+ [2] TI Table Indicator (0=GTD | 1=LDT)
+ [1:0] RPL Requested Privilege Level
# Segment Descriptor (2 x 4 byte words).
@@ -8693,6 +8776,15 @@ itself.
[47:16] Base address of GDT table.
[15:0] Length of GDT table.
++In 64-bit mode the
+{cs, ds, es, ss}
segment register have no +effect, segmentation is effectively disabled. The{gs, fs}
segment +register however can still be used for segmented memory access in +64-bit with paging enabled. Segmentation takes place before VA -> PA +address translation.The example in seg.c shows how to set the
+gs
base +address and to relative accesses.