From 2cad8341019659a65fc6e94992165b3d7b7a37db Mon Sep 17 00:00:00 2001 From: johannst Date: Sun, 23 Mar 2025 23:51:01 +0000 Subject: deploy: 916b73bee95494c205ba67e4a50e6a525afc3a3c --- print.html | 346 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 335 insertions(+), 11 deletions(-) (limited to 'print.html') diff --git a/print.html b/print.html index 28df4d6..ef23c83 100644 --- a/print.html +++ b/print.html @@ -2829,6 +2829,7 @@ renice -n 5 -p PID
  • callgrind
  • valgrind
  • vtune
  • +
  • tracy
  • /usr/bin/time(1)

    # statistics of process run
    @@ -3276,6 +3277,231 @@ main: main.c
     vtune: main
     	$(VTUNE)/bin64/vtune -collect hotspots -start-paused -- ./main
     
    +

    tracy(1)

    +

    Tracy is a frame profiler, supporting manual code instrumentation +and providing a sampling profiler.

    +

    One can either record and visualize the profiling data live using +tracy-profiler or record the profiling data to a file using tracy-capture.

    +
    tracy-profiler [file] [-p port]
    +
    +tracy-capture -o file [-f] [-p port]
    +    -f   overwrite <file> if it exists
    +
    +

    Example

    +

    The example showcases different cases:

    +
      +
    1. Use tracy from a single binary. In that case the TracyClient.cpp can be +directly linked / included in the instrumented binary.
    2. +
    3. Use tracy from different binaries (eg main executable + shared library). In +this case the TracyClient.cpp should be compiled into its own shared +library, such that there is a single tracy client.
    4. +
    5. Use tracy from different binaries on windows. In this case the +TracyClient.cpp must be compiled again into a separate shared library, +while defining TRACY_EXPORTS. The code being instrumented must be compiled +with TRACY_IMPORTS defined.
    6. +
    +

    An instrumented c++ example:

    +
    #include <chrono>
    +#include <thread>
    +
    +#include <tracy/Tracy.hpp>
    +
    +#ifdef USE_FOO
    +extern "C" void foo_comp_hook(int64_t);
    +#endif
    +
    +void init() {
    +  // Create a named zone (active for the current scope).
    +  // Name will be used when rendering the zone in the thread timeline.
    +  ZoneScopedN("init()");
    +  // Set explicit color for the rendered zone.
    +  ZoneColor(0xff0000);
    +
    +  std::this_thread::sleep_for(std::chrono::seconds(1));
    +}
    +
    +void comp(const char* name) {
    +  // Track call count.
    +  static int64_t ccnt = 0;
    +  ccnt += 1;
    +
    +  // Create an unnamed zone for the current scope.
    +  ZoneScoped;
    +  // Name the zone by formatting the name dynamically.
    +  // This name is shown for the zone in the thread timeline, however
    +  // in the zone statistics they are all accounted under one common
    +  // zone "comp".
    +  ZoneNameF("comp(%s)", name);
    +  // Additional text to attach to the zone.
    +  ZoneTextF("text(%s)", name);
    +  // Additional value to attach to the zone measurement.
    +  ZoneValue(ccnt);
    +
    +  // Statistics for dynamic names, text and values can be looked at in the zone
    +  // statistics.There measurements can be grouped by different categories.
    +
    +  // Add a simple plot.
    +  TracyPlot("comp-plot", ccnt % 4);
    +
    +  std::this_thread::sleep_for(std::chrono::milliseconds(100));
    +
    +#ifdef USE_FOO
    +  foo_comp_hook(ccnt);
    +#endif
    +}
    +
    +void post_comp() {
    +  // Create an unnamed zone for the current scope and capture callstack (max
    +  // depth 10). Capturing callstack requires platform with TRACY_HAS_CALLSTACK
    +  // support.
    +  ZoneScopedS(10);
    +  // Name the zone, w/o formatting.
    +  const char name[] = "post_comp()";
    +  ZoneName(name, sizeof(name));
    +
    +  // Add trace messages to the timeline.
    +  TracyMessageL("start sleep in post_comp()");
    +  std::this_thread::sleep_for(std::chrono::milliseconds(50));
    +  TracyMessageL("end sleep in post_comp()");
    +}
    +
    +void fini() {
    +  // Create a named zone with an explicit color.
    +  ZoneScopedNC("fini()", 0x00ff00);
    +  std::this_thread::sleep_for(std::chrono::seconds(1));
    +}
    +
    +int main() {
    +  // Create a named zone.
    +  ZoneScopedN("main()");
    +
    +  init();
    +
    +  int step = 0;
    +  while (step++ < 10) {
    +    // Create a frame message, this start a new frame with the name
    +    // "step" and end the previous frame with the name "step".
    +    FrameMarkNamed("step");
    +    // Create a named scope.
    +    ZoneScopedN("step()");
    +    comp("a");
    +    comp("b");
    +    comp("c");
    +    post_comp();
    +  }
    +
    +  fini();
    +}
    +
    +

    An instrumented c example:

    +
    #include <stdint.h>
    +#include <inttypes.h>
    +#include <stdio.h>
    +
    +#include <tracy/TracyC.h>
    +
    +static void comp_helper(int64_t i) {
    +  char buf[64];
    +  int cnt = snprintf(buf, sizeof(buf), "helper(%" PRId64 ")", i);
    +
    +  // Create an active unnamed zone.
    +  TracyCZone(ctx, 1);
    +
    +  // Name the zone.
    +  TracyCZoneName(ctx, buf, cnt);
    +  // Add custom text to the zone measurement.
    +  TracyCZoneText(ctx, buf, cnt);
    +  // Add custom value to the zone measurement.
    +  TracyCZoneValue(ctx, i);
    +
    +  for (int ii = 0; ii < i * 100000; ++ii) {
    +    /* fake work */
    +  }
    +
    +  // End the zone measurement.
    +  TracyCZoneEnd(ctx);
    +}
    +
    +void foo_comp_hook(int64_t cnt) {
    +  // Create an active named zone.
    +  TracyCZoneN(ctx, "foo", 1);
    +
    +  for (int i = 0; i < cnt; ++i) {
    +    // Plot value.
    +    TracyCPlot("foo_comp_hook", cnt + i);
    +
    +    comp_helper(i);
    +  }
    +
    +  // Configure plot "foo", probably best done once during initialization..
    +  TracyCPlotConfig("foo", TracyPlotFormatNumber, 1 /* step */, 1 /* fill */,
    +                   0xff0000);
    +  // Plot value.
    +  TracyCPlot("foo", cnt);
    +
    +  // End the zone measurement.
    +  TracyCZoneEnd(ctx);
    +}
    +
    +

    Raw build commands to demonstrate compiling tracy w/o cmake, in case we need +to integrate it into a different build system.

    +
    B := BUILD
    +
    +main: $(B)/main-static $(B)/main-dynamic $(B)/main-dynamic-win
    +tracy: $(B)/tracy
    +.PHONY: main tracy
    +
    +# -- TRACY STATIC ---------------------------------------------------------------
    +
    +$(B)/main-static: main.cpp | $(B)
    +	clang++ -DTRACY_ENABLE -I$(B)/tracy/public -o $@ $^ $(B)/tracy/public/TracyClient.cpp
    +
    +# -- TRACY DYNAMIC --------------------------------------------------------------
    +
    +$(B)/main-dynamic: main.cpp $(B)/foo.so $(B)/TracyClient.so | $(B)
    +	clang++ -DTRACY_ENABLE -I$(B)/tracy/public -DUSE_FOO -o $@ $^
    +
    +$(B)/foo.so: foo.c $(B)/TracyClient.so
    +	clang -DTRACY_ENABLE -I$(B)/tracy/public -fPIC -shared -o $@ $^
    +
    +$(B)/TracyClient.so: $(B)/tracy/public/TracyClient.cpp
    +	clang++ -DTRACY_ENABLE -I$(B)/tracy/public -fPIC -shared -o $@ $^
    +
    +# -- TRACY DYNAMIC WINDOWS ------------------------------------------------------
    +
    +$(B)/main-dynamic-win: main.cpp $(B)/foo.dll $(B)/TracyClient.dll
    +	@# eg run with wine
    +	zig c++ -target x86_64-windows -DTRACY_ENABLE -DTRACY_IMPORTS -DUSE_FOO -o $@ $^ -I $(B)/tracy/public
    +
    +$(B)/foo.dll: foo.c $(B)/TracyClient.dll
    +	zig c++ -target x86_64-windows -DTRACY_ENABLE -DTRACY_IMPORTS -fPIC -shared -o $@ $^ -I $(B)/tracy/public
    +
    +$(B)/TracyClient.dll: $(B)/tracy/public/TracyClient.cpp
    +	@# win libs from 'pragma comment(lib, ..)'
    +	zig c++ -target x86_64-windows -DTRACY_ENABLE -DTRACY_EXPORTS -fPIC -shared -o $@ $^ -lws2_32 -ldbghelp -ladvapi32 -luser32
    +
    +# -- TRACY ----------------------------------------------------------------------
    +
    +# Get latest tracy and build profiler.
    +$(B)/tracy: $(B)
    +	cd $(B); bash $(CURDIR)/get-tracy.sh
    +.PHONY: $(B)/tracy
    +
    +$B:
    +	mkdir -p $(B)
    +.PHONY: $(B)
    +
    +# -- CLEAN ----------------------------------------------------------------------
    +
    +clean:
    +	$(RM) $(B)/*.so $(B)/*.dll $(B)/*.pdb $(B)/*.lib $(B)/main*
    +
    +distclean:
    +	rm -rf $(B)
    +
    +
    +

    Find get-tracy.sh here.

    +

    Debug

    +

    gas

    +

    Frequently used directives

    +

    git(1)

    Working areas

    +-------------------+ --- stash -----> +-------+
    @@ -6091,7 +6367,7 @@ link time -lgcov.

    generated for a single file for example such as

    gcov <SRC FILE | OBJ FILE>
     
    -

    Example

    +

    Example

    #include <cstdio>
     
     void tell_me(int desc) {
    @@ -6997,7 +7273,7 @@ cpupower -c all frequency-info -g
     # Change frequency governor to POWERSAVE (eg).
     cpupower -c all frequency-set -g powersave
     
    -

    Example

    +

    Example

    Watch cpu frequency.

    watch -n1 "cpupower -c all frequency-info -f -m | xargs -n2 -d'\n'"
     
    @@ -7112,7 +7388,7 @@ locally and -R means that requests are issued remotely.

    drop into shell)
  • -f run ssh command in the background
  • -

    Example

    +

    Example

    # Forward requests on localhost:8080 to moose:1234 and keep ssh in forground
     # but dont drop into a shell.
     ssh -N -L 8080:1234 moose
    @@ -8307,6 +8583,41 @@ wrmsr     // Write MSR register, effectively does MSR[ECX] <- EDX:EAX
     

    See guest64-msr.S as an example.

    +

    Some interesting MSRs

    +
      +
    • C000_0082: IA32_LSTAR target address for syscall instruction +in IA-32e (64 bit) mode.
    • +
    • C000_0100: IA32_FS_BASE storage for %fs segment base address.
    • +
    • C000_0101: IA32_GS_BASE storage for %gs segment base address.
    • +
    • C000_0102: IA32_KERNEL_GS_BASE additional register, swapgs +swaps GS_BASE and KERNEL_GS_BASE, without altering any register state. +Can be used to swap in a pointer to a kernel data structure on syscall entry, +as for example in entry_SYSCALL_64.
    • +
    +

    Current privilege level

    +

    The current privilege level can be found at any time in the last two bits of the +code segment selector cs. The following shows an example debugging an entry +and exit of a syscall in x86_64-linux.

    +
    Breakpoint 1, entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:90
    +90		swapgs
    +(gdb) info r rax rcx cs
    +rax            0x0                 0                ; syscall nr
    +rcx            0x7feb16399e56      140647666916950  ; ret addr
    +cs             0x10                16               ; cs & 0x3 -> 0 (ring0,kernel)
    +
    +(gdb) c
    +Breakpoint 2, entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:217
    +217		sysretq
    +(gdb) info r rcx cs
    +rcx            0x7feb16399e56      140647666916950  ; ret addr
    +cs             0x10                16               ; cs & 0x3 -> 0 (ring0,kernel)
    +
    +(gdb) b *$rcx
    +(gdb) s
    +Breakpoint 3, 0x00007feb16399e56 in ?? ()
    +(gdb) info r cs
    +cs             0x33                51  ; cs & 0x3 -> 3 (ring3,user)
    +

    Size directives

    Explicitly specify size of the operation.

    mov  byte ptr [rax], 0xff    // save 1 byte(s) at [rax]
    @@ -8373,6 +8684,18 @@ mov al, 0xaa
     mov cx, 0x10
     rep stosb
     
    +

    AT&T syntax for intel syntax users

    +
    mov %rax, %rbx           // mov rbx, rax
    +mov $12, %rax            // mov rax, 12
    +
    +mov (%rsp), %rax         // mov rax, [rsp]
    +mov 8(%rsp), %rax        // mov rax, [rsp + 8]
    +mov (%rsp,%rcx,4), %rax  // mov rax, [rsp + 8 * rcx]
    +mov 0x100, %rax          // mov rax, [0x100]
    +mov (0x100), %rax        // mov rax, [0x100]
    +
    +mov %gs:8, %rax          // mov rax, gs:8
    +

    Time stamp counter - rdtsc

    static inline uint64_t rdtsc() {
       uint32_t eax, edx;
    @@ -8566,34 +8889,35 @@ must must save these registers in case they are used.

  • gnu assembler gas
  • intel syntax
  • -
    # file: greet.s
    +
    // file: greet.S
    +#include <asm/unistd.h>
     
         .intel_syntax noprefix
     
         .section .text, "ax", @progbits
         .global _start
     _start:
    -    mov rdi, 1                      # fd
    +    mov rdi, 1                      # fd (stdout)
         lea rsi, [rip + greeting]       # buf
         mov rdx, [rip + greeting_len]   # count
    -    mov rax, 1                      # write(2) syscall nr
    +    mov rax, __NR_write             # write(2) syscall nr
         syscall
     
    -    mov rdi, 0                      # exit code
    +    mov rdi, __NR_exit              # exit code
         mov rax, 60                     # exit(2) syscall nr
         syscall
     
         .section .rdonly, "a", @progbits
     greeting:
    -    .asciz "Hi ASM-World!\n"
    +    .ascii "Hi ASM-World!\n"
     greeting_len:
         .int .-greeting
     
    -

    Syscall numbers are defined in /usr/include/asm/unistd.h.

    +

    Files with .S suffix are pre-processed, while files with .s suffix are not.

    To compile and run:

    -
    > gcc -o greet greet.s -nostartfiles -nostdlib && ./greet
    +
    > gcc -o greet greet.S -nostartfiles -nostdlib && ./greet
     Hi ASM-World!
     

    MBR boot sectors example

    -- cgit v1.2.3