From f0cf514eb3ca30c5170e534c3861ad73996c7726 Mon Sep 17 00:00:00 2001 From: johannst Date: Tue, 22 Aug 2023 21:38:08 +0000 Subject: deploy: 9bb639287cae88b32fc1b17b7a4b494340e54434 --- 404.html | 2 +- arch/arm64.html | 2 +- arch/armv7.html | 2 +- arch/index.html | 2 +- arch/riscv.html | 3 +- arch/x86_64.html | 3 +- binary/index.html | 2 +- binary/nm.html | 2 +- binary/objdump.html | 2 +- binary/od.html | 2 +- binary/readelf.html | 2 +- binary/xxd.html | 2 +- css/general.css | 1 + css/print.css | 8 - css/variables.css | 10 ++ development/c++.html | 188 ++++++++++++++++++++++- development/c++filt.html | 2 +- development/gcc.html | 2 +- development/gcov.html | 6 +- development/glibc.html | 2 +- development/index.html | 3 +- development/ld.so.html | 2 +- development/make.html | 2 +- development/pgo.html | 363 ++++++++++++++++++++++++++++++++++++++++++++ development/python.html | 2 +- development/symbolver.html | 3 +- index.html | 2 +- intro.html | 2 +- linux/acl.html | 2 +- linux/coredump.html | 2 +- linux/cryptsetup.html | 2 +- linux/index.html | 6 +- linux/input.html | 12 +- linux/ptrace_scope.html | 2 +- linux/swap.html | 2 +- linux/systemd.html | 12 +- linux/zfs.html | 2 +- monitor/index.html | 2 +- monitor/lsof.html | 2 +- monitor/pgrep.html | 2 +- monitor/pidstat.html | 2 +- monitor/pmap.html | 2 +- monitor/pstack.html | 2 +- monitor/ss.html | 2 +- network/firewall-cmd.html | 2 +- network/index.html | 2 +- network/nftables.html | 2 +- network/tcpdump.html | 2 +- print.html | 354 +++++++++++++++++++++++++++++++++++++++++- searchindex.js | 2 +- searchindex.json | 2 +- tools/awk.html | 2 +- tools/bash.html | 2 +- tools/dot.html | 2 +- tools/emacs.html | 2 +- tools/fish.html | 2 +- tools/gdb.html | 2 +- tools/gdbserver.html | 2 +- tools/git.html | 4 +- tools/gpg.html | 2 +- tools/index.html | 2 +- tools/pacman.html | 2 +- tools/qemu.html | 2 +- tools/radare2.html | 2 +- tools/tmux.html | 2 +- tools/zsh.html | 2 +- trace_profile/index.html | 2 +- trace_profile/ltrace.html | 2 +- trace_profile/oprofile.html | 2 +- trace_profile/perf.html | 2 +- trace_profile/strace.html | 2 +- trace_profile/time.html | 2 +- web/chartjs.html | 2 +- web/html.html | 2 +- web/index.html | 2 +- 75 files changed, 999 insertions(+), 97 deletions(-) create mode 100644 development/pgo.html diff --git a/404.html b/404.html index 1c9d37b..aab1998 100644 --- a/404.html +++ b/404.html @@ -84,7 +84,7 @@ diff --git a/arch/arm64.html b/arch/arm64.html index 925737d..82fc9a5 100644 --- a/arch/arm64.html +++ b/arch/arm64.html @@ -83,7 +83,7 @@ diff --git a/arch/armv7.html b/arch/armv7.html index 5d4a108..2ead03f 100644 --- a/arch/armv7.html +++ b/arch/armv7.html @@ -83,7 +83,7 @@ diff --git a/arch/index.html b/arch/index.html index daa09a4..82fa063 100644 --- a/arch/index.html +++ b/arch/index.html @@ -83,7 +83,7 @@ diff --git a/arch/riscv.html b/arch/riscv.html index 7d35e9a..7d5ccec 100644 --- a/arch/riscv.html +++ b/arch/riscv.html @@ -83,7 +83,7 @@ @@ -245,6 +245,7 @@ required when compiling natively on riscv.

diff --git a/arch/x86_64.html b/arch/x86_64.html index acdc14a..3c3193c 100644 --- a/arch/x86_64.html +++ b/arch/x86_64.html @@ -83,7 +83,7 @@ @@ -439,6 +439,7 @@ Hi ASM-World!
  • GNU Assembler
  • GNU Assembler Directives
  • GNU Assembler x86_64 dependent features
  • +
  • juicebox-asm an x86_64 jit assembler playground
  • diff --git a/binary/index.html b/binary/index.html index 847f419..53212e6 100644 --- a/binary/index.html +++ b/binary/index.html @@ -83,7 +83,7 @@ diff --git a/binary/nm.html b/binary/nm.html index 4186bea..da19356 100644 --- a/binary/nm.html +++ b/binary/nm.html @@ -83,7 +83,7 @@ diff --git a/binary/objdump.html b/binary/objdump.html index 7d764e6..b6f50f0 100644 --- a/binary/objdump.html +++ b/binary/objdump.html @@ -83,7 +83,7 @@ diff --git a/binary/od.html b/binary/od.html index 16c38a1..e18032b 100644 --- a/binary/od.html +++ b/binary/od.html @@ -83,7 +83,7 @@ diff --git a/binary/readelf.html b/binary/readelf.html index e40096d..b9498e9 100644 --- a/binary/readelf.html +++ b/binary/readelf.html @@ -83,7 +83,7 @@ diff --git a/binary/xxd.html b/binary/xxd.html index 3d1f6e2..e1da033 100644 --- a/binary/xxd.html +++ b/binary/xxd.html @@ -83,7 +83,7 @@ diff --git a/css/general.css b/css/general.css index 344b53e..a6db680 100644 --- a/css/general.css +++ b/css/general.css @@ -5,6 +5,7 @@ :root { /* Browser default font-size is 16px, this way 1 rem = 10px */ font-size: 62.5%; + color-scheme: var(--color-scheme); } html { diff --git a/css/print.css b/css/print.css index 5e690f7..27d05e9 100644 --- a/css/print.css +++ b/css/print.css @@ -22,14 +22,6 @@ overflow-y: initial; } -code { - background-color: #666666; - border-radius: 5px; - - /* Force background to be printed in Chrome */ - -webkit-print-color-adjust: exact; -} - pre > .buttons { z-index: 2; } diff --git a/css/variables.css b/css/variables.css index 21bf8e5..59fcda8 100644 --- a/css/variables.css +++ b/css/variables.css @@ -50,6 +50,8 @@ --searchresults-border-color: #888; --searchresults-li-bg: #252932; --search-mark-bg: #e3b171; + + --color-scheme: dark; } .coal { @@ -90,6 +92,8 @@ --searchresults-border-color: #98a3ad; --searchresults-li-bg: #2b2b2f; --search-mark-bg: #355c7d; + + --color-scheme: dark; } .light { @@ -130,6 +134,8 @@ --searchresults-border-color: #888; --searchresults-li-bg: #e4f2fe; --search-mark-bg: #a2cff5; + + --color-scheme: light; } .navy { @@ -170,6 +176,8 @@ --searchresults-border-color: #5c5c68; --searchresults-li-bg: #242430; --search-mark-bg: #a2cff5; + + --color-scheme: dark; } .rust { @@ -210,6 +218,8 @@ --searchresults-border-color: #888; --searchresults-li-bg: #dec2a2; --search-mark-bg: #e69f67; + + --color-scheme: light; } @media (prefers-color-scheme: dark) { diff --git a/development/c++.html b/development/c++.html index 94cc7fd..9e296dd 100644 --- a/development/c++.html +++ b/development/c++.html @@ -83,7 +83,7 @@ @@ -170,6 +170,7 @@

    c++

    +

    openstd cpp standards.

    Source files of most examples is available here.

    Type deduction

    Force compile error to see what auto is deduced to.

    @@ -178,6 +179,191 @@ // force compile error typename decltype(foo)::_; +

    Strict aliasing and type punning

    +

    The strict aliasing rules describe via which alias a value can be accessed.

    +
    +

    Informal: an alias is a reference / pointer to a value.

    +
    +

    Accessing a value through an alias that violates the strict aliasing rules is +undefined behavior (UB).

    +

    Examples below on godbolt.

    +
    int i = 0;
    +
    +// Valid aliasing (signed / unsigned type).
    +*reinterpret_cast<signed int*>(&i);
    +*reinterpret_cast<unsigned int*>(&i);
    +
    +// Valid aliasing (cv qualified type).
    +*reinterpret_cast<const int*>(&i);
    +*reinterpret_cast<const unsigned*>(&i);
    +
    +// Valid aliasing (byte type).
    +*reinterpret_cast<char*>(&i);
    +*reinterpret_cast<std::byte*>(&i);
    +
    +// Invalid aliasing, dereferencing pointer is UB.
    +*reinterpret_cast<short*>(&i);
    +*reinterpret_cast<float*>(&i);
    +
    +
    +

    NOTE: Casting pointer to invalid aliasing type is not directly UB, but +dereferencing the pointer is UB.

    +
    +
    short s[2] = { 1, 2 };
    +
    +// Invalid aliasing (UB) - type punning, UB to deref ptr (int has stricter
    +// alignment requirements than short).
    +*reinterpret_cast<int*>(s);
    +
    +
    +// Arbitrary byte pointer.
    +char c[4] = { 1, 2, 3, 4 };
    +
    +// Invalid aliasing (UB) - type punning, UB to deref ptr (int has stricter
    +// alignment requirements than char).
    +*reinterpret_cast<int*>(c);
    +
    +

    At the time of writing, the current c++ std draft +contains the following.

    +
    If a program attempts to access the stored value of an object through a glvalue
    +whose type is not **similar** (7.3.6) to one of the following types the
    +behavior is undefined [44]
    +
    +(11.1) the dynamic type of the object,
    +(11.2) a type that is the signed or unsigned type corresponding to the dynamic
    +       type of the object, or
    +(11.3) a char, unsigned char, or std::byte type.
    +
    +[44]: The intent of this list is to specify those circumstances in which an
    +      object can or cannot be aliased.
    +
    +

    The paragraph is short but one also needs to understand the meaning of +similar (similar_types).

    +

    This paragraph is actually somewhat more explicit in the c++17 std.

    +
    If a program attempts to access the stored value of an object through a glvalue
    +of other than one of the following types the behavior is undefined [63]
    +
    +(11.1) the dynamic type of the object,
    +(11.2) a cv-qualified version of the dynamic type of the object,
    +(11.3) a type similar (as defined in 7.5) to the dynamic type of the object,
    +(11.4) a type that is the signed or unsigned type corresponding to the dynamic
    +       type of the object,
    +(11.5) a type that is the signed or unsigned type corresponding to a
    +       cv-qualified version of the dynamic type of the object,
    +(11.6) an aggregate or union type that includes one of the aforementioned types
    +       among its elements or non- static data members (including, recursively,
    +       an element or non-static data member of a subaggregate or contained
    +       union),
    +(11.7) a type that is a (possibly cv-qualified) base class type of the dynamic
    +       type of the object,
    +(11.8) a char, unsigned char, or std::byte type.
    +
    +[63]: The intent of this list is to specify those circumstances in which an
    +      object may or may not be aliased.
    +
    +

    Additional references:

    +
      +
    • +

      What is the Strict Aliasing Rule and Why do we care

      +

      The article shows a small example how the compiler may optimized using the +strict aliasing rules.

      +
      int alias(int* i, char* c) {
      +  *i = 1;
      +  *c = 'a';  // char* may alias int*
      +  return *i;
      +}
      +
      +int noalias(int* i, short* s) {
      +    *i = 1;
      +    *s = 2;  // short* does not alias int*
      +    return *i;
      +}
      +
      +
      alias(int*, char*):
      +mov    DWORD PTR [rdi] ,0x1  ; *i = 1;
      +mov    BYTE PTR [rsi], 0x61  ; *c = 'a';
      +mov    eax,DWORD PTR [rdi]   ; Must reload, char* can alias int*.
      +ret
      +
      +noalias(int*, short*):
      +mov    DWORD PTR [rdi], 0x1  ; *i = 1;
      +mov    WORD PTR [rsi], 0x2   ; *s = 2;
      +mov    eax,0x1               ; Must not reload, short* can not alias int*.
      +ret
      +
      +
    • +
    • +

      reinterpret_cast type aliasing

      +
      +
        +
      1. Any object pointer type T1* can be converted to another object pointer +type cv T2*. This is exactly equivalent to static_cast<cv T2*>(static_cast<cv void*>(expression)) (which implies that if T2's +alignment requirement is not stricter than T1's, the value of the pointer +does not change and conversion of the resulting pointer back to its +original type yields the original value). In any case, the resulting +pointer may only be dereferenced safely if allowed by the type aliasing +rules (see below).
      2. +
      +
      +
      int I;
      +char* X = reinterpret_cast<char*>(&I);  // Valid, char allowed to alias int.
      +*X = 42;
      +int* Y = reinterpret_cast<int*>(X);     // Cast back to original type.
      +*Y = 1337;  // safe
      +
      +char C[4];
      +int* P = reinterpret_cast<int*>(C);     // Cast is ok, not yet UB.
      +*P = 1337; // UB, violates strict aliasing / alignment rules.
      +           // https://stackoverflow.com/questions/52492229/c-byte-array-to-int
      +
      +
    • +
    • +

      On gcc strict aliasing is enabled starting with -O2.

      +
      for i in {0..3} g s; do echo "-O$i $(g++ -Q --help=optimizers -O$i | grep fstrict-aliasing)"; done
      +-O0   -fstrict-aliasing           [disabled]
      +-O1   -fstrict-aliasing           [disabled]
      +-O2   -fstrict-aliasing           [enabled]
      +-O3   -fstrict-aliasing           [enabled]
      +-Og   -fstrict-aliasing           [disabled]
      +-Os   -fstrict-aliasing           [enabled]
      +
      +
    • +
    +

    __restrict keyword

    +

    The __restrict keyword allows the programmer to tell the compiler that two +pointer will not alias each other.

    +
    int alias(int* a, int* b) {
    +    *a = 1;
    +    *b = 2;
    +    return *a;
    +}
    +
    +// alias(int*, int*):                           # @alias(int*, int*)
    +//         mov     dword ptr [rdi], 1
    +//         mov     dword ptr [rsi], 2
    +//         mov     eax, dword ptr [rdi]
    +//         ret
    +
    +int noalias(int* __restrict a, int* __restrict b) {
    +    *a = 1;
    +    *b = 2;
    +    return *a;
    +}
    +
    +// noalias(int*, int*):                         # @noalias(int*, int*)
    +//         mov     dword ptr [rdi], 1
    +//         mov     dword ptr [rsi], 2
    +//         mov     eax, 1
    +//         ret
    +
    +

    However this should only be used with care and in a narrow scope, as it is easy +to violate self defined contract, see godbolt.

    +

    Type punning

    +

    The correct way to do type-punning in c++:

    +
      +
    1. std::bit_cast (c++20)
    2. +
    3. std::memcpy
    4. +

    Variadic templates (parameter pack)

    #include <iostream>
     
    diff --git a/development/c++filt.html b/development/c++filt.html
    index 1468f2a..e7b7614 100644
    --- a/development/c++filt.html
    +++ b/development/c++filt.html
    @@ -83,7 +83,7 @@
     
             
    diff --git a/development/gcc.html b/development/gcc.html
    index 4490222..c8a236a 100644
    --- a/development/gcc.html
    +++ b/development/gcc.html
    @@ -83,7 +83,7 @@
     
             
    diff --git a/development/gcov.html b/development/gcov.html
    index e065a03..6103b8d 100644
    --- a/development/gcov.html
    +++ b/development/gcov.html
    @@ -83,7 +83,7 @@
     
             
    @@ -265,7 +265,7 @@ clean:
                                     
                                 
     
    -                            
     
    @@ -279,7 +279,7 @@ clean:
                             
                         
     
    -                    
                 
    diff --git a/development/glibc.html b/development/glibc.html
    index 7961bf3..2807110 100644
    --- a/development/glibc.html
    +++ b/development/glibc.html
    @@ -83,7 +83,7 @@
     
             
    diff --git a/development/index.html b/development/index.html
    index edd1fef..696e124 100644
    --- a/development/index.html
    +++ b/development/index.html
    @@ -83,7 +83,7 @@
     
             
    @@ -180,6 +180,7 @@
     
  • symbol versioning
  • python
  • gcov
  • +
  • pgo
  • diff --git a/development/ld.so.html b/development/ld.so.html index 8a88d42..110cc1c 100644 --- a/development/ld.so.html +++ b/development/ld.so.html @@ -83,7 +83,7 @@ diff --git a/development/make.html b/development/make.html index 630715d..36d174a 100644 --- a/development/make.html +++ b/development/make.html @@ -83,7 +83,7 @@ diff --git a/development/pgo.html b/development/pgo.html new file mode 100644 index 0000000..b928df6 --- /dev/null +++ b/development/pgo.html @@ -0,0 +1,363 @@ + + + + + + pgo - Notes + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + + + +
    +
    +

    Profile guided optimization (pgo)

    +

    pgo is an optimization technique to optimize a program for its usual +workload.

    +

    It is applied in two phases:

    +
      +
    1. Collect profiling data (best with representative benchmarks).
    2. +
    3. Optimize program based on collected profiling data.
    4. +
    +

    The following simple program is used as demonstrator.

    +
    #include <stdio.h>
    +
    +#define NOINLINE __attribute__((noinline))
    +
    +NOINLINE void foo() { puts("foo()"); }
    +NOINLINE void bar() { puts("bar()"); }
    +
    +int main(int argc, char *argv[]) {
    +  if (argc == 2) {
    +    foo();
    +  } else {
    +    bar();
    +  }
    +}
    +
    +

    clang

    +

    On the actual machine with clang 15.0.7, the following code is generated for +the main() function.

    +
    # clang -o test test.c -O3
    +
    +0000000000001160 <main>:
    +    1160:  50                   push   rax
    +    ; Jump if argc != 2.
    +    1161:  83 ff 02             cmp    edi,0x2
    +    1164:  75 09                jne    116f <main+0xf>
    +    ; foor() is on the hot path (fall-through).
    +    1166:  e8 d5 ff ff ff       call   1140 <_Z3foov>
    +    116b:  31 c0                xor    eax,eax
    +    116d:  59                   pop    rcx
    +    116e:  c3                   ret
    +    ; bar() is on the cold path (branch).
    +    116f:  e8 dc ff ff ff       call   1150 <_Z3barv>
    +    1174:  31 c0                xor    eax,eax
    +    1176:  59                   pop    rcx
    +    1177:  c3                   ret
    +
    +

    The following shows how to compile with profiling instrumentation and how to +optimize the final program with the collected profiling data (llvm +pgo).

    +

    The arguments to ./test are chosen such that 9/10 runs call bar(), which +is currently on the cold path.

    +
    # Compile test program with profiling instrumentation.
    +clang -o test test.cc -O3 -fprofile-instr-generate
    +
    +# Collect profiling data from multiple runs.
    +for i in {0..10}; do
    +    LLVM_PROFILE_FILE="prof.clang/%p.profraw" ./test $(seq 0 $i)
    +done
    +
    +# Merge raw profiling data into single profile data.
    +llvm-profdata merge -o pgo.profdata prof.clang/*.profraw
    +
    +# Optimize test program with profiling data.
    +clang -o test test.cc -O3 -fprofile-use=pgo.profdata
    +
    +
    +

    NOTE: If LLVM_PROFILE_FILE is not given the profile data is written to +default.profraw which is re-written on each run. If the LLVM_PROFILE_FILE +contains a %m in the filename, a unique integer will be generated and +consecutive runs will update the same generated profraw file, +LLVM_PROFILE_FILE can specify a new file every time, however that requires +more storage in general.

    +
    +

    After optimizing the program with the profiling data, the main() function +looks as follows.

    +
    0000000000001060 <main>:
    +    1060:  50                    push   rax
    +    ; Jump if argc == 2.
    +    1061:  83 ff 02              cmp    edi,0x2
    +    1064:  74 09                 je     106f <main+0xf>
    +    ; bar() is on the hot path (fall-through).
    +    1066:  e8 e5 ff ff ff        call   1050 <_Z3barv>
    +    106b:  31 c0                 xor    eax,eax
    +    106d:  59                    pop    rcx
    +    106e:  c3                    ret
    +    ; foo() is on the cold path (branch).
    +    106f:  e8 cc ff ff ff        call   1040 <_Z3foov>
    +    1074:  31 c0                 xor    eax,eax
    +    1076:  59                    pop    rcx
    +    1077:  c3                    ret
    +
    +

    gcc

    +

    With gcc 13.2.1 on the current machine, the optimizer puts bar() on the +hot path by default.

    +
    0000000000001040 <main>:
    +    1040:  48 83 ec 08          sub    rsp,0x8
    +    ; Jump if argc == 2.
    +    1044:  83 ff 02             cmp    edi,0x2
    +    1047:  74 0c                je     1055 <main+0x15>
    +    ; bar () is on the hot path (fall-through).
    +    1049:  e8 22 01 00 00       call   1170 <_Z3barv>
    +    104e:  31 c0                xor    eax,eax
    +    1050:  48 83 c4 08          add    rsp,0x8
    +    1054:  c3                   ret
    +    ; foo() is on the cold path (branch).
    +    1055:  e8 06 01 00 00       call   1160 <_Z3foov>
    +    105a:  eb f2                jmp    104e <main+0xe>
    +    105c:  0f 1f 40 00          nop    DWORD PTR [rax+0x0]
    +
    +
    +

    The following shows how to compile with profiling instrumentation and how to +optimize the final program with the collected profiling data.

    +

    The arguments to ./test are chosen such that 2/3 runs call foo(), which +is currently on the cold path.

    +
    gcc -o test test.cc -O3 -fprofile-generate
    +./test 1
    +./test 1
    +./test 2 2
    +gcc -o test test.cc -O3 -fprofile-use
    +
    +
    +

    NOTE: Consecutive runs update the generated test.gcda profile data file +rather than re-write it.

    +
    +

    After optimizing the program with the profiling data, the main() function

    +
    0000000000001040 <main.cold>:
    +    ; bar() is on the cold path (branch).
    +    1040:  e8 05 00 00 00       call   104a <_Z3barv>
    +    1045:  e9 25 00 00 00       jmp    106f <main+0xf>
    +
    +0000000000001060 <main>:
    +    1060:  51                   push   rcx
    +    ; Jump if argc != 2.
    +    1061:  83 ff 02             cmp    edi,0x2
    +    1064:  0f 85 d6 ff ff ff    jne    1040 <main.cold>
    +    ; for() is on the hot path (fall-through).
    +    106a:  e8 11 01 00 00       call   1180 <_Z3foov>
    +    106f:  31 c0                xor    eax,eax
    +    1071:  5a                   pop    rdx
    +    1072:  c3                   ret
    +
    + +
    + + +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + +
    + + diff --git a/development/python.html b/development/python.html index 0362177..cb9d6af 100644 --- a/development/python.html +++ b/development/python.html @@ -83,7 +83,7 @@ diff --git a/development/symbolver.html b/development/symbolver.html index a968cbd..1b82a41 100644 --- a/development/symbolver.html +++ b/development/symbolver.html @@ -83,7 +83,7 @@ @@ -372,6 +372,7 @@ func_v2
  • Binutils ld: Symbol Versioning
  • LSB: Symbol Versioning
  • How To Write Shared Libraries
  • +
  • LSB: ELF File Format
  • diff --git a/index.html b/index.html index bddfc9a..404c7a2 100644 --- a/index.html +++ b/index.html @@ -83,7 +83,7 @@ diff --git a/intro.html b/intro.html index bddfc9a..404c7a2 100644 --- a/intro.html +++ b/intro.html @@ -83,7 +83,7 @@ diff --git a/linux/acl.html b/linux/acl.html index 5a420e3..44bf5dd 100644 --- a/linux/acl.html +++ b/linux/acl.html @@ -83,7 +83,7 @@ diff --git a/linux/coredump.html b/linux/coredump.html index d9ff589..62fb21d 100644 --- a/linux/coredump.html +++ b/linux/coredump.html @@ -83,7 +83,7 @@ diff --git a/linux/cryptsetup.html b/linux/cryptsetup.html index 730ff00..35ebd61 100644 --- a/linux/cryptsetup.html +++ b/linux/cryptsetup.html @@ -83,7 +83,7 @@ diff --git a/linux/index.html b/linux/index.html index 00bf1ee..12cb51b 100644 --- a/linux/index.html +++ b/linux/index.html @@ -83,7 +83,7 @@ @@ -185,7 +185,7 @@