From 9bb639287cae88b32fc1b17b7a4b494340e54434 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Tue, 22 Aug 2023 23:35:54 +0200 Subject: c++: strict aliasing --- src/development/c++.md | 204 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 203 insertions(+), 1 deletion(-) diff --git a/src/development/c++.md b/src/development/c++.md index 2fc7a09..2df60c2 100644 --- a/src/development/c++.md +++ b/src/development/c++.md @@ -1,5 +1,7 @@ # c++ +openstd [cpp standards][openstd-stds]. + Source files of most examples is available [here][src-examples]. ## Type deduction @@ -12,6 +14,199 @@ auto foo = bar(); typename decltype(foo)::_; ``` +## Strict aliasing and type punning + +The `strict aliasing` rules describe via which `alias` a value can be accessed. +> Informal: an `alias` is a reference / pointer to a value. + +Accessing a value through an alias that violates the strict aliasing rules is +`undefined behavior (UB)`. + +Examples below on [godbolt](https://godbolt.org/z/TsvTY9zfj). +```cpp +int i = 0; + +// Valid aliasing (signed / unsigned type). +*reinterpret_cast(&i); +*reinterpret_cast(&i); + +// Valid aliasing (cv qualified type). +*reinterpret_cast(&i); +*reinterpret_cast(&i); + +// Valid aliasing (byte type). +*reinterpret_cast(&i); +*reinterpret_cast(&i); + +// Invalid aliasing, dereferencing pointer is UB. +*reinterpret_cast(&i); +*reinterpret_cast(&i); +``` +> NOTE: Casting pointer to invalid aliasing type is not directly UB, but +> dereferencing the pointer is UB. + +```cpp +short s[2] = { 1, 2 }; + +// Invalid aliasing (UB) - type punning, UB to deref ptr (int has stricter +// alignment requirements than short). +*reinterpret_cast(s); + + +// Arbitrary byte pointer. +char c[4] = { 1, 2, 3, 4 }; + +// Invalid aliasing (UB) - type punning, UB to deref ptr (int has stricter +// alignment requirements than char). +*reinterpret_cast(c); +``` + +At the time of writing, the current [c++ std draft][std-draft-aliasing] +contains the following. +```text +If a program attempts to access the stored value of an object through a glvalue +whose type is not **similar** (7.3.6) to one of the following types the +behavior is undefined [44] + +(11.1) the dynamic type of the object, +(11.2) a type that is the signed or unsigned type corresponding to the dynamic + type of the object, or +(11.3) a char, unsigned char, or std::byte type. + +[44]: The intent of this list is to specify those circumstances in which an + object can or cannot be aliased. +``` + +The paragraph is short but one also needs to understand the meaning of +[similar (*similar_types*)][std-draft-similar-types]. + +This paragraph is actually somewhat more explicit in the [c++17 std][std-17]. +```text +If a program attempts to access the stored value of an object through a glvalue +of other than one of the following types the behavior is undefined [63] + +(11.1) the dynamic type of the object, +(11.2) a cv-qualified version of the dynamic type of the object, +(11.3) a type similar (as defined in 7.5) to the dynamic type of the object, +(11.4) a type that is the signed or unsigned type corresponding to the dynamic + type of the object, +(11.5) a type that is the signed or unsigned type corresponding to a + cv-qualified version of the dynamic type of the object, +(11.6) an aggregate or union type that includes one of the aforementioned types + among its elements or non- static data members (including, recursively, + an element or non-static data member of a subaggregate or contained + union), +(11.7) a type that is a (possibly cv-qualified) base class type of the dynamic + type of the object, +(11.8) a char, unsigned char, or std::byte type. + +[63]: The intent of this list is to specify those circumstances in which an + object may or may not be aliased. +``` + +Additional references: +- [What is the Strict Aliasing Rule and Why do we care][gist-strict-aliasing] + + The article shows a small example how the compiler may optimized using the + strict aliasing rules. + ```cpp + int alias(int* i, char* c) { + *i = 1; + *c = 'a'; // char* may alias int* + return *i; + } + + int noalias(int* i, short* s) { + *i = 1; + *s = 2; // short* does not alias int* + return *i; + } + ``` + ```x86asm + alias(int*, char*): + mov DWORD PTR [rdi] ,0x1 ; *i = 1; + mov BYTE PTR [rsi], 0x61 ; *c = 'a'; + mov eax,DWORD PTR [rdi] ; Must reload, char* can alias int*. + ret + + noalias(int*, short*): + mov DWORD PTR [rdi], 0x1 ; *i = 1; + mov WORD PTR [rsi], 0x2 ; *s = 2; + mov eax,0x1 ; Must not reload, short* can not alias int*. + ret + ``` +- [reinterpret_cast][reinterpret-aliasing] type aliasing + > 5) Any object pointer type `T1*` can be converted to another object pointer + > type `cv T2*`. This is exactly equivalent to `static_cast T2*>(static_cast(expression))` (which implies that if T2's + > alignment requirement is not stricter than T1's, the value of the pointer + > does not change and conversion of the resulting pointer back to its + > original type yields the original value). In any case, the resulting + > pointer may only be dereferenced safely if allowed by the type aliasing + > rules (see below). + + ```cpp + int I; + char* X = reinterpret_cast(&I); // Valid, char allowed to alias int. + *X = 42; + int* Y = reinterpret_cast(X); // Cast back to original type. + *Y = 1337; // safe + + char C[4]; + int* P = reinterpret_cast(C); // Cast is ok, not yet UB. + *P = 1337; // UB, violates strict aliasing / alignment rules. + // https://stackoverflow.com/questions/52492229/c-byte-array-to-int + ``` + - On `gcc` strict aliasing is enabled starting with `-O2`. + ```bash + for i in {0..3} g s; do echo "-O$i $(g++ -Q --help=optimizers -O$i | grep fstrict-aliasing)"; done + -O0 -fstrict-aliasing [disabled] + -O1 -fstrict-aliasing [disabled] + -O2 -fstrict-aliasing [enabled] + -O3 -fstrict-aliasing [enabled] + -Og -fstrict-aliasing [disabled] + -Os -fstrict-aliasing [enabled] + ``` + +### `__restrict` keyword + +The `__restrict` keyword allows the programmer to tell the compiler that two +pointer will not alias each other. +```cpp +int alias(int* a, int* b) { + *a = 1; + *b = 2; + return *a; +} + +// alias(int*, int*): # @alias(int*, int*) +// mov dword ptr [rdi], 1 +// mov dword ptr [rsi], 2 +// mov eax, dword ptr [rdi] +// ret + +int noalias(int* __restrict a, int* __restrict b) { + *a = 1; + *b = 2; + return *a; +} + +// noalias(int*, int*): # @noalias(int*, int*) +// mov dword ptr [rdi], 1 +// mov dword ptr [rsi], 2 +// mov eax, 1 +// ret +``` + +However this should only be used with care and in a narrow scope, as it is easy +to violate self defined contract, see [godbolt](https://godbolt.org/z/e8x1af3Mh). + +### Type punning + +The correct way to do `type-punning` in c++: +1. [`std::bit_cast`][std-bitcast] (c++20) +1. [`std::memcpy`](https://godbolt.org/z/3PM4jGvEz) + ## Variadic templates ([parameter pack][parameter-pack]) ```cpp @@ -98,10 +293,17 @@ A more detailed description is available in the SO discussion [How does {{#include c++/fwd-perfect.cc:3:}} ``` -[gist-strict-asliasing]: https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8 [parameter-pack]: https://en.cppreference.com/w/cpp/language/parameter_pack [enable-if]: https://en.cppreference.com/w/cpp/types/enable_if [sfinae]: https://en.cppreference.com/w/cpp/language/sfinae [fwd-ref]: https://en.cppreference.com/w/cpp/language/reference#Forwarding_references [std-fwd]: https://en.cppreference.com/w/cpp/utility/forward +[std-bitcast]: https://en.cppreference.com/w/cpp/numeric/bit_cast [src-examples]: https://github.com/johannst/notes/tree/master/src/development/c%2B%2B +[reinterpret-aliasing]: https://en.cppreference.com/w/cpp/language/reinterpret_cast#Type_aliasing +[gist-strict-aliasing]: https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8 +[std-draft-aliasing]: http://eel.is/c++draft/basic.lval#11 +[std-draft-similar-types]: http://eel.is/c++draft/conv.qual#def:similar_types +[std-17]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4713.pdf +[openstd-home]: https://www.open-std.org/jtc1/sc22/wg21/ +[openstd-stds]: https://www.open-std.org/jtc1/sc22/wg21/docs/standards -- cgit v1.2.3