aboutsummaryrefslogtreecommitdiffhomepage
path: root/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc
diff options
context:
space:
mode:
Diffstat (limited to 'content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc')
-rw-r--r--content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc134
1 files changed, 134 insertions, 0 deletions
diff --git a/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc b/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc
new file mode 100644
index 0000000..319481b
--- /dev/null
+++ b/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc
@@ -0,0 +1,134 @@
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+
+#include <atomic>
+#include <thread>
+
+#include <unistd.h>
+
+#if __x86_64 != 1
+#error "Example only for x86_64"
+#endif
+
+// Only used because we look at the disassembly.
+#define NOINLINE __attribute__((noinline))
+
+// -- UTILITIES ----------------------------------------------------------------
+
+template <typename T>
+NOINLINE bool cmpxchg16b(T* dest, T oldval, T newval) {
+ static_assert(sizeof(T) == 16, "Required for CMPXCHG16B");
+ static_assert(alignof(T) == 16, "Required for CMPXCHG16B");
+
+ const auto as_u128 = [](T* ptr) {
+ return reinterpret_cast<unsigned __int128*>(ptr);
+ };
+ // Use legacy __sync atomic builtin instead __atomic builtin because gcc will
+ // directly emit the 16byte compare exchange instruction instead of emitting a
+ // library call. This is nice when looking at the disassembly, weeh.
+ //
+ // -mcx16
+ // This option enables GCC to generate "CMPXCHG16B" instructions in 64-bit
+ // code to implement compare-and- exchange operations on 16-byte aligned
+ // 128-bit objects. This is useful for atomic updates of data
+ // structures exceeding one machine word in size. The compiler uses this
+ // instruction to implement __sync Builtins. However, for __atomic Builtins
+ // operating on 128-bit integers, a library call is always used.
+ return __sync_bool_compare_and_swap(as_u128(dest), *as_u128(&oldval),
+ *as_u128(&newval));
+}
+
+// -- BLOCK STACK --------------------------------------------------------------
+
+struct BlockStack {
+ struct Block {
+ Block* next{nullptr};
+ unsigned mem{0};
+ };
+
+ void push(Block* blk) {
+ printf("[%d] push blk->mem=%d\n", gettid(), blk->mem);
+
+ GenerationPtr old_head, new_head;
+ do {
+ old_head = m_head;
+ blk->next = old_head.ptr;
+ new_head.ptr = blk;
+ new_head.gen = old_head.gen + 1;
+ } while (!cmpxchg16b(&m_head, old_head, new_head));
+ }
+
+ Block* pop(int s = 0) {
+ GenerationPtr old_head, new_head;
+ do {
+ old_head = m_head;
+ new_head.ptr = old_head.ptr->next;
+ new_head.gen = old_head.gen + 1;
+
+ // Simulate thread being interrupted.
+ if (s) {
+ sleep(s);
+ }
+ } while (!cmpxchg16b(&m_head, old_head, new_head));
+
+ printf("[%d] pop blk->mem=%d\n", gettid(), old_head.ptr->mem);
+ return old_head.ptr;
+ }
+
+private:
+ struct GenerationPtr {
+ Block* ptr{nullptr};
+ uint64_t gen{0};
+ } __attribute__((aligned(16)));
+
+ GenerationPtr m_head;
+};
+
+// -- MAIN ---------------------------------------------------------------------
+
+int main() {
+ BlockStack::Block B1, B2, B3;
+ B1.mem = 1;
+ B2.mem = 2;
+ B3.mem = 3;
+
+ // Create free memory block list.
+ // S: head -> 1 -> 2 -> 3.
+ BlockStack S;
+ S.push(&B3);
+ S.push(&B2);
+ S.push(&B1);
+
+ auto T1 = std::thread([&S]() {
+ // Read head=1 & next=2, then get interrupted (simulated by sleep).
+ // After resuming, T2 already popped 1 & 2 and pushed again 1 (ABA).
+ //
+ // The first CAS will fail even block 1 is currently at the beginning of the
+ // list as the generation of the OLD_ROOT does not match the generation of
+ // M_ROOT. The CAS loop will retry, read the new M_ROOT, and hence the new
+ // NEXT pointer pointing to 3 and then successfully update M_ROOT.
+ auto* B1 = S.pop(2);
+ assert(B1->mem == 1);
+ // Pops 3, as it should be.
+ auto* B3 = S.pop();
+ assert(B3->mem == 3);
+ });
+
+ auto T2 = std::thread([&S]() {
+ // Artificial sleep, such that T1 entered pop and read head & next.
+ sleep(1);
+
+ // Pop 1 & 2.
+ auto* B1 = S.pop();
+ assert(B1->mem == 1);
+ auto* B2 = S.pop();
+ assert(B2->mem == 2);
+
+ // Re-insert 1.
+ S.push(B1);
+ });
+
+ T2.join();
+ T1.join();
+}