From 824c3aa83d9acc45567421c0d08ca6eb4d718a43 Mon Sep 17 00:00:00 2001 From: Johannes Stoelp Date: Wed, 15 Nov 2023 02:36:11 +0100 Subject: post: cas, aba and ll/sc first draft --- .../2023-09-01-cas-llsc-aba/cas-ptr-generation.cc | 134 +++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc (limited to 'content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc') diff --git a/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc b/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc new file mode 100644 index 0000000..319481b --- /dev/null +++ b/content/2023-09-01-cas-llsc-aba/cas-ptr-generation.cc @@ -0,0 +1,134 @@ +#include +#include +#include + +#include +#include + +#include + +#if __x86_64 != 1 +#error "Example only for x86_64" +#endif + +// Only used because we look at the disassembly. +#define NOINLINE __attribute__((noinline)) + +// -- UTILITIES ---------------------------------------------------------------- + +template +NOINLINE bool cmpxchg16b(T* dest, T oldval, T newval) { + static_assert(sizeof(T) == 16, "Required for CMPXCHG16B"); + static_assert(alignof(T) == 16, "Required for CMPXCHG16B"); + + const auto as_u128 = [](T* ptr) { + return reinterpret_cast(ptr); + }; + // Use legacy __sync atomic builtin instead __atomic builtin because gcc will + // directly emit the 16byte compare exchange instruction instead of emitting a + // library call. This is nice when looking at the disassembly, weeh. + // + // -mcx16 + // This option enables GCC to generate "CMPXCHG16B" instructions in 64-bit + // code to implement compare-and- exchange operations on 16-byte aligned + // 128-bit objects. This is useful for atomic updates of data + // structures exceeding one machine word in size. The compiler uses this + // instruction to implement __sync Builtins. However, for __atomic Builtins + // operating on 128-bit integers, a library call is always used. + return __sync_bool_compare_and_swap(as_u128(dest), *as_u128(&oldval), + *as_u128(&newval)); +} + +// -- BLOCK STACK -------------------------------------------------------------- + +struct BlockStack { + struct Block { + Block* next{nullptr}; + unsigned mem{0}; + }; + + void push(Block* blk) { + printf("[%d] push blk->mem=%d\n", gettid(), blk->mem); + + GenerationPtr old_head, new_head; + do { + old_head = m_head; + blk->next = old_head.ptr; + new_head.ptr = blk; + new_head.gen = old_head.gen + 1; + } while (!cmpxchg16b(&m_head, old_head, new_head)); + } + + Block* pop(int s = 0) { + GenerationPtr old_head, new_head; + do { + old_head = m_head; + new_head.ptr = old_head.ptr->next; + new_head.gen = old_head.gen + 1; + + // Simulate thread being interrupted. + if (s) { + sleep(s); + } + } while (!cmpxchg16b(&m_head, old_head, new_head)); + + printf("[%d] pop blk->mem=%d\n", gettid(), old_head.ptr->mem); + return old_head.ptr; + } + +private: + struct GenerationPtr { + Block* ptr{nullptr}; + uint64_t gen{0}; + } __attribute__((aligned(16))); + + GenerationPtr m_head; +}; + +// -- MAIN --------------------------------------------------------------------- + +int main() { + BlockStack::Block B1, B2, B3; + B1.mem = 1; + B2.mem = 2; + B3.mem = 3; + + // Create free memory block list. + // S: head -> 1 -> 2 -> 3. + BlockStack S; + S.push(&B3); + S.push(&B2); + S.push(&B1); + + auto T1 = std::thread([&S]() { + // Read head=1 & next=2, then get interrupted (simulated by sleep). + // After resuming, T2 already popped 1 & 2 and pushed again 1 (ABA). + // + // The first CAS will fail even block 1 is currently at the beginning of the + // list as the generation of the OLD_ROOT does not match the generation of + // M_ROOT. The CAS loop will retry, read the new M_ROOT, and hence the new + // NEXT pointer pointing to 3 and then successfully update M_ROOT. + auto* B1 = S.pop(2); + assert(B1->mem == 1); + // Pops 3, as it should be. + auto* B3 = S.pop(); + assert(B3->mem == 3); + }); + + auto T2 = std::thread([&S]() { + // Artificial sleep, such that T1 entered pop and read head & next. + sleep(1); + + // Pop 1 & 2. + auto* B1 = S.pop(); + assert(B1->mem == 1); + auto* B2 = S.pop(); + assert(B2->mem == 2); + + // Re-insert 1. + S.push(B1); + }); + + T2.join(); + T1.join(); +} -- cgit v1.2.3