From f0cf514eb3ca30c5170e534c3861ad73996c7726 Mon Sep 17 00:00:00 2001 From: johannst Date: Tue, 22 Aug 2023 21:38:08 +0000 Subject: deploy: 9bb639287cae88b32fc1b17b7a4b494340e54434 --- development/c++.html | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 1 deletion(-) (limited to 'development/c++.html') diff --git a/development/c++.html b/development/c++.html index 94cc7fd..9e296dd 100644 --- a/development/c++.html +++ b/development/c++.html @@ -83,7 +83,7 @@ @@ -170,6 +170,7 @@

c++

+

openstd cpp standards.

Source files of most examples is available here.

Type deduction

Force compile error to see what auto is deduced to.

@@ -178,6 +179,191 @@ // force compile error typename decltype(foo)::_; +

Strict aliasing and type punning

+

The strict aliasing rules describe via which alias a value can be accessed.

+
+

Informal: an alias is a reference / pointer to a value.

+
+

Accessing a value through an alias that violates the strict aliasing rules is +undefined behavior (UB).

+

Examples below on godbolt.

+
int i = 0;
+
+// Valid aliasing (signed / unsigned type).
+*reinterpret_cast<signed int*>(&i);
+*reinterpret_cast<unsigned int*>(&i);
+
+// Valid aliasing (cv qualified type).
+*reinterpret_cast<const int*>(&i);
+*reinterpret_cast<const unsigned*>(&i);
+
+// Valid aliasing (byte type).
+*reinterpret_cast<char*>(&i);
+*reinterpret_cast<std::byte*>(&i);
+
+// Invalid aliasing, dereferencing pointer is UB.
+*reinterpret_cast<short*>(&i);
+*reinterpret_cast<float*>(&i);
+
+
+

NOTE: Casting pointer to invalid aliasing type is not directly UB, but +dereferencing the pointer is UB.

+
+
short s[2] = { 1, 2 };
+
+// Invalid aliasing (UB) - type punning, UB to deref ptr (int has stricter
+// alignment requirements than short).
+*reinterpret_cast<int*>(s);
+
+
+// Arbitrary byte pointer.
+char c[4] = { 1, 2, 3, 4 };
+
+// Invalid aliasing (UB) - type punning, UB to deref ptr (int has stricter
+// alignment requirements than char).
+*reinterpret_cast<int*>(c);
+
+

At the time of writing, the current c++ std draft +contains the following.

+
If a program attempts to access the stored value of an object through a glvalue
+whose type is not **similar** (7.3.6) to one of the following types the
+behavior is undefined [44]
+
+(11.1) the dynamic type of the object,
+(11.2) a type that is the signed or unsigned type corresponding to the dynamic
+       type of the object, or
+(11.3) a char, unsigned char, or std::byte type.
+
+[44]: The intent of this list is to specify those circumstances in which an
+      object can or cannot be aliased.
+
+

The paragraph is short but one also needs to understand the meaning of +similar (similar_types).

+

This paragraph is actually somewhat more explicit in the c++17 std.

+
If a program attempts to access the stored value of an object through a glvalue
+of other than one of the following types the behavior is undefined [63]
+
+(11.1) the dynamic type of the object,
+(11.2) a cv-qualified version of the dynamic type of the object,
+(11.3) a type similar (as defined in 7.5) to the dynamic type of the object,
+(11.4) a type that is the signed or unsigned type corresponding to the dynamic
+       type of the object,
+(11.5) a type that is the signed or unsigned type corresponding to a
+       cv-qualified version of the dynamic type of the object,
+(11.6) an aggregate or union type that includes one of the aforementioned types
+       among its elements or non- static data members (including, recursively,
+       an element or non-static data member of a subaggregate or contained
+       union),
+(11.7) a type that is a (possibly cv-qualified) base class type of the dynamic
+       type of the object,
+(11.8) a char, unsigned char, or std::byte type.
+
+[63]: The intent of this list is to specify those circumstances in which an
+      object may or may not be aliased.
+
+

Additional references:

+
    +
  • +

    What is the Strict Aliasing Rule and Why do we care

    +

    The article shows a small example how the compiler may optimized using the +strict aliasing rules.

    +
    int alias(int* i, char* c) {
    +  *i = 1;
    +  *c = 'a';  // char* may alias int*
    +  return *i;
    +}
    +
    +int noalias(int* i, short* s) {
    +    *i = 1;
    +    *s = 2;  // short* does not alias int*
    +    return *i;
    +}
    +
    +
    alias(int*, char*):
    +mov    DWORD PTR [rdi] ,0x1  ; *i = 1;
    +mov    BYTE PTR [rsi], 0x61  ; *c = 'a';
    +mov    eax,DWORD PTR [rdi]   ; Must reload, char* can alias int*.
    +ret
    +
    +noalias(int*, short*):
    +mov    DWORD PTR [rdi], 0x1  ; *i = 1;
    +mov    WORD PTR [rsi], 0x2   ; *s = 2;
    +mov    eax,0x1               ; Must not reload, short* can not alias int*.
    +ret
    +
    +
  • +
  • +

    reinterpret_cast type aliasing

    +
    +
      +
    1. Any object pointer type T1* can be converted to another object pointer +type cv T2*. This is exactly equivalent to static_cast<cv T2*>(static_cast<cv void*>(expression)) (which implies that if T2's +alignment requirement is not stricter than T1's, the value of the pointer +does not change and conversion of the resulting pointer back to its +original type yields the original value). In any case, the resulting +pointer may only be dereferenced safely if allowed by the type aliasing +rules (see below).
    2. +
    +
    +
    int I;
    +char* X = reinterpret_cast<char*>(&I);  // Valid, char allowed to alias int.
    +*X = 42;
    +int* Y = reinterpret_cast<int*>(X);     // Cast back to original type.
    +*Y = 1337;  // safe
    +
    +char C[4];
    +int* P = reinterpret_cast<int*>(C);     // Cast is ok, not yet UB.
    +*P = 1337; // UB, violates strict aliasing / alignment rules.
    +           // https://stackoverflow.com/questions/52492229/c-byte-array-to-int
    +
    +
  • +
  • +

    On gcc strict aliasing is enabled starting with -O2.

    +
    for i in {0..3} g s; do echo "-O$i $(g++ -Q --help=optimizers -O$i | grep fstrict-aliasing)"; done
    +-O0   -fstrict-aliasing           [disabled]
    +-O1   -fstrict-aliasing           [disabled]
    +-O2   -fstrict-aliasing           [enabled]
    +-O3   -fstrict-aliasing           [enabled]
    +-Og   -fstrict-aliasing           [disabled]
    +-Os   -fstrict-aliasing           [enabled]
    +
    +
  • +
+

__restrict keyword

+

The __restrict keyword allows the programmer to tell the compiler that two +pointer will not alias each other.

+
int alias(int* a, int* b) {
+    *a = 1;
+    *b = 2;
+    return *a;
+}
+
+// alias(int*, int*):                           # @alias(int*, int*)
+//         mov     dword ptr [rdi], 1
+//         mov     dword ptr [rsi], 2
+//         mov     eax, dword ptr [rdi]
+//         ret
+
+int noalias(int* __restrict a, int* __restrict b) {
+    *a = 1;
+    *b = 2;
+    return *a;
+}
+
+// noalias(int*, int*):                         # @noalias(int*, int*)
+//         mov     dword ptr [rdi], 1
+//         mov     dword ptr [rsi], 2
+//         mov     eax, 1
+//         ret
+
+

However this should only be used with care and in a narrow scope, as it is easy +to violate self defined contract, see godbolt.

+

Type punning

+

The correct way to do type-punning in c++:

+
    +
  1. std::bit_cast (c++20)
  2. +
  3. std::memcpy
  4. +

Variadic templates (parameter pack)

#include <iostream>
 
-- 
cgit v1.2.3